Esempio n. 1
0
        public Parser()
        {
            moviePageUrl = new Regex(@"^/film/\d+/$");
            htmlParser   = new AngleSharp.Parser.Html.HtmlParser();

            MoviePageParser = new Dictionary <RequiredValueType, Func <string, string> > {
                [RequiredValueType.Country] = x => { throw new NotImplementedException(); },
                [RequiredValueType.Genre]   = x => { throw new NotImplementedException(); },
                [RequiredValueType.Name]    = x => {
                    var dom     = htmlParser.Parse(x);
                    var element = dom
                                  .QuerySelector("div#headerFilm>h1.moviename-big");
                    if (element.InnerHtml == element.TextContent)
                    {
                        return(element.TextContent);
                    }
                    else
                    {
                        return(Regex.Replace(element.InnerHtml, " <.*", ""));
                    }
                },
                [RequiredValueType.Producer] = x => { throw new NotImplementedException(); },
                [RequiredValueType.Rate]     = x => { throw new NotImplementedException(); },
                [RequiredValueType.Year]     = x => {
                    var dom = htmlParser.Parse(x);
                    return(dom.All
                           .Where(m => m.TagName == "td" && m.ClassName == "type" && m.TextContent == "год")
                           .First()
                           .NextElementSibling
                           .Children
                           .Where(m => m.TagName == "a")
                           .First()
                           .TextContent);
                },
            };

            SearchResultEntryParser = new Dictionary <RequiredValueType, Func <string, string> > {
                [RequiredValueType.Country] = x => {
                    var dom = htmlParser.Parse(x);
                    return(dom
                           .QuerySelector("span.grey+span.grey")
                           .TextContent);
                },
                [RequiredValueType.Genre] = x => { throw new NotImplementedException(); },
                [RequiredValueType.Name]  = x => {
                    var dom = htmlParser.Parse(x);
                    return(dom
                           .QuerySelector("p.name>a")
                           .TextContent);
                },
                [RequiredValueType.Producer] = x => { throw new NotImplementedException(); },
                [RequiredValueType.Rate]     = x => { throw new NotImplementedException(); },
                [RequiredValueType.Year]     = x => {
                    var dom = htmlParser.Parse(x);
                    return(dom
                           .QuerySelector("p.name>span.year")
                           .TextContent);
                },
            };
        }
Esempio n. 2
0
        public override List <VideoHosting> Parse()
        {
            var    parser = new AngleSharp.Parser.Html.HtmlParser();
            string htmlSourse;

            using (WebClient client = new WebClient())
            {
                client.Encoding = Encoding.UTF8;
                htmlSourse      = client.DownloadString(GetUrlLink());
            }

            IHtmlDocument document = parser.Parse(htmlSourse);
            var           h2s      = document
                                     .GetElementsByTagName("h2");
            var result = document
                         .GetElementsByTagName("h2")                           // Получили массив тэгов h2
                         .Where(t => t.ClassList.Contains("serp-item__title")) // Отфильтровали те, которые содержат класс serp-item__title
                         .Select(t => new VideoHosting                         // Преобразовали результаты в нужный вид
            {
                NameVideo   = ExcludeTagB(t.InnerHtml),
                NameHosting = HostNames.Yandex,
                Time        = DateTime.Now
            })
                         .ToList(); // Преобразовали к типу "Список"

            return(result);
        }
Esempio n. 3
0
        ////////////////////////Files/////////////////////////////


        ///////////////////////Parser///////////////////////////////

        public void runParser()
        {
            //Получаем файлы, необходимые для парсинга и обработки
            foreach (string file in getFiles())
            {
                //1. Формируем новое название файла (для вывески на сайт)
                var newName = this.eNewName(file);
                if (newName != "")
                {
                    //2. Получаем содержимое файла
                    //var html = File.Open(file,FileMode.Open);
                    FileStream fstream = File.OpenRead(file);
                    // преобразуем строку в байты
                    byte[] array = new byte[fstream.Length];
                    // считываем данные
                    fstream.Read(array, 0, array.Length);
                    // декодируем байты в строку
                    string html = System.Text.Encoding.Default.GetString(array);
                    fstream.Close();


                    //3. Парсим HTML
                    var parser   = new AngleSharp.Parser.Html.HtmlParser();
                    var document = parser.Parse(html);

                    var table = document.QuerySelector("table");
                    //Осталось найти только необходимые элементы и их удалить.
                    var a = 1;


                    //4. Убираем из HTML две строки (конкурсная группа и количество мест)
                }
            }
        }
        private IEnumerable <HouseInfo> GetRoomList(string url)
        {
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var page       = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlResult);

            return(page.QuerySelector("ul.screening_left_ul").QuerySelectorAll("li").Select(element =>
            {
                var screening_time = element.QuerySelector("p.screening_time").TextContent;
                var screening_price = element.QuerySelector("h5").TextContent;
                var locationInfo = element.QuerySelector("a");
                var locationContent = locationInfo.TextContent.Split(',').FirstOrDefault();
                var location = locationContent.Remove(0, locationContent.IndexOf("租") + 1);

                decimal housePrice = 0;
                decimal.TryParse(screening_price.Replace("¥", "").Replace("元/月", ""), out housePrice);

                var markBGType = (housePrice / 1000) > (int)LocationMarkBGType.Black ? LocationMarkBGType.Black : (LocationMarkBGType)(housePrice / 1000);

                return new HouseInfo
                {
                    Money = screening_price,
                    HouseURL = "http://www.huzhumaifang.com" + locationInfo.GetAttribute("href"),
                    HouseLocation = location,
                    HouseTime = screening_time,
                    HousePrice = housePrice,
                    LocationMarkBG = markBGType.ToString() + ".png",
                };
            }));
        }
Esempio n. 5
0
        /// <summary>
        /// Retrieve remote html document
        /// </summary>
        /// <param name="uri"></param>
        /// <returns></returns>
        private IDocument GetHtmlDocument(string uri)
        {
            try
            {
                //html to extract from document
                string html = "";
                string url  = uri;

                //download html document from remote URI
                using (WebClient client = new WebClient())
                {
                    //add request headers to appear as browser
                    client.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36");
                    client.Headers.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
                    html = client.DownloadString(url);
                }

                //parse html document
                var parser = new AngleSharp.Parser.Html.HtmlParser();

                //return parsed IDocument
                return(parser.Parse(html));
            }
            catch (Exception ex)
            {
                LogEvent("File " + uri + " not found or document is empty.");
                //return null if any exception occurs
                return(null);
            }
        }
Esempio n. 6
0
        public static List <Article> ParseArticleList(string html, string urlPage)
        {
            List <Article> articles = new List <Article>();

            //Won't find some encodings : just press F5 several times...
            var htmlDocument = new AngleSharp.Parser.Html.HtmlParser().Parse(html);

            foreach (var articleElement in htmlDocument.QuerySelectorAll("article[data-acturowid][data-datepubli]"))
            {
                String date                 = articleElement.Attributes["data-datepubli"].Value;
                var    image                = articleElement.QuerySelectorAll("img[class=ded-image]").First().Attributes["data-frz-src"].GetAbsUrl(urlPage);
                var    urlElement           = articleElement.QuerySelectorAll("h1 > a[href]").First();
                var    subTitle             = articleElement.QuerySelectorAll("span[class=soustitre]").First().TextContent.Substring(2);
                var    commentsCountElement = articleElement.QuerySelectorAll("span[class=nb_comments]").FirstOrDefault();

                int.TryParse(commentsCountElement?.TextContent, out int commentsCount);

                articles.Add(new Article
                {
                    Id = articleElement.Attributes["data-acturowid"].Value,
                    PublicationTimeStamp = ConvertToTimeStamp(date, Constants.FORMAT_DATE_ARTICLE),
                    UrlIllustration      = image,
                    Url                = urlElement.Attributes["href"].GetAbsUrl(urlPage),
                    Title              = urlElement.TextContent,
                    SubTitle           = subTitle,
                    TotalCommentsCount = commentsCount,
                    HasSubscription    = articleElement.QuerySelectorAll("img[alt=badge_abonne]").Count() > 0,
                });
            }

            return(articles);
        }
        private int GetPageCount(string indexURL)
        {
            var htmlResult = HTTPHelper.GetHTMLByURL(indexURL);
            var page       = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlResult);

            return(Convert.ToInt32(page.QuerySelector("a.end")?.TextContent ?? "0"));
        }
Esempio n. 8
0
        public ProxyCollection GetProxyByCountry(Country country)
        {
            string       url      = String.Format("http://spys.one/free-proxy-list/{0}/", country.abb);
            string       response = client.DownloadString(url);
            HtmlDocument html     = new HtmlDocument();

            html.LoadHtml(response);
            JavaScriptEngine parser = new JavaScriptEngine();
            var jscontext           = new AngleSharp.Parser.Html.HtmlParser().Parse(response);

            parser.EvaluateScript(jscontext, html.DocumentNode.SelectSingleNode(@"/html/body/script").InnerText);
            var res = new ProxyCollection();

            foreach (Match proxyitem in Regex.Matches(response, @"((\d+)\.(\d+)\.(\d+)\.(\d+))(<script(.*?)</script>)(.*?)(HTTP</font></a>|HTTP</font><font|SOCKS5)"))
            {
                var    proxy = new Proxy();
                string port  = (string)parser.EvaluateScript(jscontext, "\"\"+" + Regex.Match(proxyitem.Value, @"(?<=\+)(.*?)(?=\)</script)").Value);
                string ip    = Regex.Match(proxyitem.Value, @"(\d+)\.(\d+)\.(\d+)\.(\d+)").Value;
                proxy.ip   = ip;
                proxy.port = port;
                switch (proxyitem.Groups[9].Value)
                {
                case @"HTTP</font></a>": proxy.protocol = "http"; break;

                case @"HTTP</font><font": proxy.protocol = "https"; break;

                case @"SOCKS5": proxy.protocol = "socks5"; break;

                default: proxy.protocol = "unknown"; break;
                }
                res.Add(proxy);
            }
            return(res);
        }
Esempio n. 9
0
        public async Task <Parsing> ParseAsync(string source)
        {
            if (string.IsNullOrWhiteSpace(_source) || !Equals(_source, source))
            {
                _source = source;
                var domParser = new AngleSharp.Parser.Html.HtmlParser();
                _document = await domParser.ParseAsync(source);
            }

            Parsing    result = new Parsing();
            List <Tag> tags   = new List <Tag>();
            IHtmlCollection <IElement> parsedTags;

            await Task.Run(() =>
            {
                foreach (string element in Settings.Tags)
                {
                    parsedTags = _document?.QuerySelectorAll(element);
                    foreach (IElement tag in parsedTags)
                    {
                        tags.Add(new Tag()
                        {
                            Name = tag.TagName.ToLower(), Attributes = ParseTag(tag), Parsing = result
                        });
                    }
                }
                result.Date = DateTime.UtcNow;
                result.Tags = tags;
            });

            return(result);
        }
Esempio n. 10
0
        public static IEnumerator TextContent(string _text, string _Selector)
        // public static void Main(string _HTTP, string _Selector)
        {
            string _return_string = "";


            var parser   = new AngleSharp.Parser.Html.HtmlParser();
            var document = parser.Parse(_text);
            //var blueListItemsCSS = document.QuerySelectorAll("p");
            var blueListItemsCSS = document.QuerySelectorAll(_Selector);

            //var blueListItemsCSS = document.QuerySelectorAll("p").Where(item => item.ClassName != null && item.ClassName.Contains("post__title_link"));



            foreach (var item in blueListItemsCSS)
            {
                if (item.TextContent != null)
                {
                    _return_string = _return_string + item.TextContent;
                }
            }

            yield return(_return_string);
            //return _return_string;
        }
Esempio n. 11
0
        public Dictionary <int, List <Shedule> > GetShedule(string faculty, string teacher, string group, string sdate, string edate)
        {
            string request = "faculty=" + faculty + "&teacher=" + teacher + " &group=" + group + "&sdate=" + sdate + "&edate=" + edate + "&n=700";
            Dictionary <int, List <Shedule> > SheduleDictionary = new Dictionary <int, List <Shedule> >();
            List <Shedule> shedules = new List <Shedule>();

            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
            wc.Encoding = Encoding.GetEncoding(1251);
            string        htmlresult = wc.UploadString(UrlShedule, request);
            IHtmlDocument IHD        = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlresult);
            var           tables     = IHD.QuerySelectorAll("table");

            for (var a = 0; a < tables.Length; a++)
            {
                var trs = tables[a].Children[0].Children;
                foreach (var tr in trs)
                {
                    string time    = tr.Children[1].InnerHtml.Replace("<br>", "-");
                    string subject = tr.Children[2].InnerHtml;
                    shedules.Add(new Shedule()
                    {
                        time = time, subject = subject
                    });
                }
                SheduleDictionary.Add(a, shedules);
                shedules = new List <Shedule>();
            }

            return(SheduleDictionary);
        }
Esempio n. 12
0
        public async Task Parse()
        {
            string source;

            if (isUrl)
            {
                source = await _loader.GetPage();
            }
            else
            {
                source = Source;
            }
            Console.WriteLine("loader");
            var domParser = new AngleSharp.Parser.Html.HtmlParser();
            var document  = await domParser.ParseAsync(source);

            Console.WriteLine("parser");
            _result = await _parser.Parse(document);

            ParsingCompleted?.Invoke(this, new ParseResult <Dictionary <string, List <StringDictionary> > >()
            {
                Res = _result
            });
            //OnNewData?.Invoke(this, result);
        }
Esempio n. 13
0
        public async Task <Video> parse()
        {
            string html;

            using (var handler = new HttpClientHandler())
                using (var client = new HttpClient(handler))
                {
                    //client.DefaultRequestHeaders.Accept.ParseAdd("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
                    var response = await client.GetAsync(url);

                    if (!response.IsSuccessStatusCode)
                    {
                        throw new Exception(response.ReasonPhrase);
                    }

                    html = await response.Content.ReadAsStringAsync();
                }

            var video = new Video();

            var document = new AngleSharp.Parser.Html.HtmlParser().Parse(html);

            video.title = document.QuerySelectorAll("meta[property='og:title']").First().GetAttribute("content");

            video.thumbnailUrl = document.QuerySelectorAll("meta[property='og:image']").First().GetAttribute("content");

            video.description = document.QuerySelectorAll("p[id='eow-description']").First().TextContent;



            return(video);
        }
Esempio n. 14
0
        public List <AssetContent> GetAssetsContents(string html, string pagePath, string htmlName)
        {
            var assetsContents = new List <AssetContent>();
            var parser         = new AngleSharp.Parser.Html.HtmlParser();
            var doc            = parser.Parse(html);

            var htmlAssets   = GetHtmlAssets(doc);
            var nonCssAssets = htmlAssets.Where(a => a.Suffix.ToLower() != "css").ToList();

            AddBinaryAssetsContents(assetsContents, nonCssAssets, pagePath);

            foreach (var asset in htmlAssets.Where(a => a.Suffix.ToLower() == "css"))
            {
                try
                {
                    var cssStringContent = GetStringAsset(asset.Uri, mapPathResolver, webRoot, pagePath);
                    if (!string.IsNullOrEmpty(cssStringContent))
                    {
                        var cssAssets = GetCssAssets(cssStringContent);
                        AddBinaryAssetsContents(assetsContents, cssAssets, asset.Uri);
                        foreach (var assetContent in assetsContents)
                        {
                            // TODO: use regex to avoid replace uri that is not link but text
                            cssStringContent = cssStringContent.Replace(assetContent.Uri, assetContent.NewUri + "." + assetContent.Suffix);
                        }
                        var cssassetContent = Encoding.UTF8.GetBytes(cssStringContent);
                        if (!assetsContents.Any(a => a.Uri == asset.Uri))
                        {
                            assetsContents.Add(new AssetContent
                            {
                                Uri     = asset.Uri,
                                NewUri  = asset.NewUri,
                                Suffix  = asset.Suffix,
                                Content = cssassetContent
                            });
                        }
                    }
                }
                catch (Exception ex)
                { /* TODO: Trace somewhere */ }
            }
            // finally add index html
            foreach (var assetContent in assetsContents)
            {
                // TODO: use regex to avoid replace uri that is not link but text
                html = html.Replace(assetContent.Uri, assetContent.NewUri + "." + assetContent.Suffix);
            }
            html = "<!DOCTYPE html>" + html;
            var htmlContent = Encoding.UTF8.GetBytes(html);

            assetsContents.Add(new AssetContent
            {
                Uri     = pagePath,
                NewUri  = htmlName,
                Suffix  = "html",
                Content = htmlContent
            });
            return(assetsContents);
        }
        private void PostSearch(IElement item, List <string> dict)
        {
            GeneralPost tweet = new GeneralPost();
            var         h     = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("AdaptiveMediaOuterContainer"));

            if (!(h.Count() == 0))
            {
                if (h.First().QuerySelectorAll("img").Count() != 0)
                {
                    tweet.Image = h.First().QuerySelectorAll("img").First().Attributes["src"].Value;
                }
            }
            long id = long.Parse(item.Attributes["data-item-id"].Value);

            tweet.Text = item.QuerySelectorAll("p").Where(k => k.ClassName.Contains("tweet-text")).First().InnerHtml;

            Cenzor cenzor = new Cenzor();

            tweet.Text = cenzor.Cenz(tweet.Text, dict);

            tweet.Social     = SocialMedia.Twitter;
            tweet.AuthorName = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("tweet")).First().Attributes["data-name"].Value;
            string linkname = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("tweet")).First().Attributes["data-screen-name"].Value;

            tweet.PostLink     = "https://twitter.com/" + linkname + "/status/" + id;
            tweet.AuthorLink   = "https://twitter.com/" + linkname;
            tweet.AuthorAvatar = item.QuerySelectorAll("img").Where(y => y.ClassName.Contains("avatar")).First().Attributes["src"].Value;
            try
            {
                var elemwithdate    = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("content")).First().QuerySelectorAll("div").Where(o => o.ClassName.Contains("stream-item-header")).First().QuerySelectorAll("small").Where(u => u.ClassName.Contains("time")).First().QuerySelectorAll("a").Where(f => f.ClassName.Contains("tweet-timestamp")).First().Attributes["title"].Value;
                var massivstrdate   = elemwithdate.Split('-');
                var massivyearmohtn = massivstrdate[1].Split(' ');
                var h1 = massivstrdate[0].TrimEnd(' ');
                var h2 = massivyearmohtn[1];
                var h3 = massivyearmohtn[2];
                var h4 = massivyearmohtn[3];
                var d  = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("content")).First().QuerySelectorAll("div").Where(o => o.ClassName.Contains("stream-item-header")).First().QuerySelectorAll("small").Where(u => u.ClassName.Contains("time")).First().QuerySelectorAll("a").Where(f => f.ClassName.Contains("tweet-timestamp")).First().QuerySelectorAll("span").Where(p => p.ClassName.Contains("_timestamp")).First().Attributes["data-time-ms"].Value;
                var s1 = h1.Split(':');
                tweet.Date = (new DateTime(Int32.Parse(h4), getMonth(h3), Int32.Parse(h2), Int32.Parse(s1[0]), Int32.Parse(s1[1]), 0));
            }
            catch {
            }
            AngleSharp.Parser.Html.HtmlParser parser       = new AngleSharp.Parser.Html.HtmlParser();
            AngleSharp.Dom.Html.IHtmlDocument htmldocument = parser.Parse(tweet.Text);

            var links = htmldocument.QuerySelectorAll("a");

            foreach (var link in links)
            {
                tweet.Text = tweet.Text.Replace(link.OuterHtml, link.InnerHtml);
            }



            lock (posts)
            {
                posts.Add(tweet);
            }
        }
Esempio n. 16
0
        public void SetUp()
        {
            parser       = new AngleSharp.Parser.Html.HtmlParser();
            htmlProvider = new Mock <IHtmlProvider>();
            parserConfig = new Mock <IParserConfigProvider>();

            provider = new ConcertProvider(parser, htmlProvider.Object, parserConfig.Object);
        }
Esempio n. 17
0
        public IEnumerable <TModel> BindModelWithStream <TModel>(Stream stream, Action <TModel> postBindAction = null)
            where TModel : class, new()
        {
            HtmlParser    htmlParser   = new HtmlParser();
            IHtmlDocument htmlDocument = htmlParser.Parse(stream);

            return(BindModelWithHtmlDocument <TModel>(htmlDocument, postBindAction));
        }
Esempio n. 18
0
        public IEnumerable <TModel> BindModelHtmlContent <TModel>(string htmlContent, Action <TModel> postBindAction = null)
            where TModel : class, new()
        {
            HtmlParser    htmlParser   = new HtmlParser();
            IHtmlDocument htmlDocument = htmlParser.Parse(htmlContent);

            return(BindModelWithHtmlDocument(htmlDocument, postBindAction));
        }
Esempio n. 19
0
        public List <Asset> GetHtmlAssets(string html)
        {
            var assets = new List <Asset>();
            var parser = new AngleSharp.Parser.Html.HtmlParser();
            var doc    = parser.Parse(html.ToLowerInvariant());

            assets = GetHtmlAssets(doc);
            return(assets);
        }
Esempio n. 20
0
        public static int GetCommentsCount(string unContenu, string urlPage)
        {
            var    htmlDocument  = new AngleSharp.Parser.Html.HtmlParser().Parse(unContenu);
            var    stringNbComms = htmlDocument.QuerySelectorAll("span[class=actu_separator_comms]").First().TextContent;
            int    spacePosition = stringNbComms.IndexOf(" ");
            string value         = stringNbComms.JavaSubString(0, spacePosition).Trim();

            int.TryParse(value, out int nbComms);
            return(nbComms);
        }
Esempio n. 21
0
        public static async Task <IHtmlDocument> GetHtmlByWebsite(Website website)
        {
            var client   = new WebClient();
            var download = client.DownloadString(website.WebSiteUri);

            var parser = new AngleSharp.Parser.Html.HtmlParser();
            var result = await parser.ParseAsync(download);

            Tools.DebugTrace(" Lib.GetHtmlByWebsite", Thread.CurrentThread.ManagedThreadId);

            return(result);
        }
        private async Task <AssetPathCollection> ExtractAssets(string source, string suffix, string pagePath, AssetPathCollection assets)
        {
            if (suffix.ToLower().StartsWith("htm"))
            {
                var parser = new AngleSharp.Parser.Html.HtmlParser();
                var doc    = parser.Parse(source);
                var images = doc.Images
                             .Where(x => x.HasAttribute("src"));
                var styles = doc.GetElementsByTagName("link")
                             .Where(l => l.Attributes["rel"].Value.Trim().ToLower() == "stylesheet")
                             .Where(c => c.HasAttribute("href"));
                var scripts = doc.GetElementsByTagName("script")
                              .Where(x => x.HasAttribute("src"));
                assets.AddSerializedAssets(images, "src");
                assets.AddSerializedAssets(scripts, "src");
                assets.AddSerializedAssets(styles, "href");
                foreach (var asset in assets.Assets.Where(a => a.AssetType == "css"))
                {
                    var content = await Zipper.GetStringAsset(asset.OriginalPath, mapPathResolver, webRoot, pagePath);

                    var binaryContent = Encoding.UTF8.GetBytes(content);
                    asset.Content = binaryContent;
                    await ExtractAssets(content, "css", asset.OriginalPath, assets);
                }

                foreach (var asset in assets.Assets.Where(a => a.AssetType != "css"))
                {
                    var binaryContent = await Zipper.GetBinaryAsset(asset.OriginalPath, mapPathResolver, webRoot, pagePath);

                    asset.Content = binaryContent;
                }
            }
            else if (suffix.ToLower() == "css")
            {
                var urls = Zipper.ExtaxtUrlsFromStyle(source);
                foreach (var url in urls)
                {
                    var csslocalPath = await Zipper.ReturnLocalPath(url);

                    var csssuffix = csslocalPath.Split('.').Last();
                    var newUrl    = Guid.NewGuid().ToString().Replace("-", "") + "." + suffix;
                    source = source.Replace(url, newUrl);
                    //assets.AddSerializedAssets()
                    //if (!doneAssets.Contains(newUrl))
                    //{
                    //    zipArchive.AddBinaryAssetToArchive(newUrl, localPath, mapPathResolver, webRoot, serialStyle.Key);
                    //    doneAssets.Add(newUrl);
                    //}
                }
            }
            return(assets);
        }
Esempio n. 23
0
        public static AngleSharp.Dom.Html.IHtmlDocument ReadAndParseHtmlFile(string path)
        {
            // Set up angleSharp html parser
            var angleSharConfig = Configuration.Default.WithCss();
            var parser          = new AngleSharp.Parser.Html.HtmlParser(angleSharConfig);

            // Read the source html file
            FileStream fs = File.OpenRead(path);

            // Parse it into a DOM with AngleSharp
            var doc = parser.Parse(fs);

            return(doc);
        }
        /// <summary>
        /// Convert a text file with HTML content to plain text.
        /// </summary>
        /// <param name="html">The HTML string to convert.</param>
        /// <returns>The plain text representation of the HTML content.</returns>
        public string ToPlainText(string html)
        {
            var parser   = new AngleSharp.Parser.Html.HtmlParser();
            var document = parser.Parse(html);

            var sw = new StringWriter();

            ConvertContentToText(document.ChildNodes, sw);
            sw.Flush();
            var text = sw.ToString();

            // strip leading white space and more than 2 consecutive line breaks
            return(text.Trim());
        }
        public void Bundle()
        {
            var    html             = "<html><head><link href=\"/Content/css?v=peG2vCX8wlIEw2lPUnRL6uPAxina05CUT_UoTb_UXfw1\" rel=\"stylesheet\"/><script src=\"/bundles/modernizr?v=wBEWDufH_8Md-Pbioxomt90vm6tJN2Pyy9u9zHtWsPo1\"></script></head>Hello <img src=\"Content/test.png\" /></html>";
            string rootpath         = AppDomain.CurrentDomain.BaseDirectory;
            var    mockPathResolver = new Mock <IMapPathResolver>();

            mockPathResolver.Setup(x => x.MapPath("", "Content/test.png"))
            .Returns(Path.Combine(rootpath, "Content", "test.png"));
            mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.eot"))
            .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.eot"));
            mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.svg"))
            .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.svg"));
            mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.ttf"))
            .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.ttf"));
            mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.woff"))
            .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.woff"));
            mockPathResolver.Setup(x => x.MapPath("", "../../images/cheap_diagonal_fabric.png"))
            .Returns(Path.Combine(rootpath, "Content", "test.png"));
            byte[] zippedHtml = Zipper.ZipPage(html, mockPathResolver.Object, "http://localhost:57399", "");


            var fileStream = new MemoryStream(zippedHtml);

            //fileStream.Position = 0;
            using (var zip = new ZipArchive(fileStream, ZipArchiveMode.Read))
            {
                Assert.Equal(4, zip.Entries.Count);
                Assert.True(zip.Entries.ToList().Exists(x => x.Name.EndsWith(".css")), "no css file");
                foreach (var entry in zip.Entries)
                {
                    using (var stream = entry.Open())
                    {
                        // do whatever we want with stream
                        // ...
                        //stream.Position = 0;
                        var sr = new StreamReader(stream);
                        if (entry.Name.ToLower() == "index.html")
                        {
                            var myStr  = sr.ReadToEnd();
                            var parser = new AngleSharp.Parser.Html.HtmlParser();
                            var doc    = parser.Parse(myStr);
                            Assert.Equal(1, doc.Images.Count());
                            Assert.NotEqual("Content/test.png", doc.Images[0].Attributes["src"].Value);
                            //Assert.Equal(html, myStr);
                        }
                    }
                }
            }
        }
Esempio n. 26
0
        private string getSmall()
        {
            article = GetLsi();
            var    parser     = new AngleSharp.Parser.Html.HtmlParser();
            var    document   = parser.Parse(text);
            string anchorHTML = string.Empty;

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchors = document.QuerySelectorAll("a");
            foreach (AngleSharp.Dom.IElement anchor in anchors)
            {
                anchorHTML = anchor.OuterHtml;
            }
            article = anchorHTML + Environment.NewLine + article;
            return(article);
        }
Esempio n. 27
0
        public static IEnumerator GetAttribute(string _text, string _Selector, string _Attribute)
        // public static void Main(string _HTTP, string _Selector)
        {
            var parser           = new AngleSharp.Parser.Html.HtmlParser();
            var document         = parser.Parse(_text);
            var blueListItemsCSS = document.QuerySelectorAll(_Selector);

            foreach (var item in blueListItemsCSS)
            {
                if (item.GetAttribute(_Attribute) != null)
                {
                    yield return(item.GetAttribute("href"));
                }
            }
        }
        private IEnumerable <FilmSession> GetDataFromSource(string source, string date)
        {
            List <FilmSession>        filmSessionList  = new List <FilmSession>();
            Dictionary <string, Film> existingFilmList = new Dictionary <string, Film>();

            var parser    = new AngleSharp.Parser.Html.HtmlParser();
            var webClient = new WebClient()
            {
                Encoding = System.Text.Encoding.UTF8
            };
            var document = parser.Parse(webClient.DownloadString(source));

            foreach (var item in document.QuerySelectorAll("#afishaKtName"))
            {
                foreach (var subItem in item.NextElementSibling.QuerySelectorAll("#afishaItem"))
                {
                    var filmSession     = new FilmSession();
                    var filmDetailsLink = subItem.QuerySelector(".filmName a").GetAttribute("href").Trim();
                    if (existingFilmList.Keys.Contains(filmDetailsLink))
                    {
                        filmSession.Film = existingFilmList[filmDetailsLink];
                    }
                    else
                    {
                        Thread filmParserThread = new Thread(() =>
                                                             { filmSession.Film = ParseFilmDetails(subItem.QuerySelector(".filmName a").GetAttribute("href").Trim()); });
                        filmParserThread.Start();
                        filmParserThread.Join();
                    }

                    filmSession.Cinema = new Cinema()
                    {
                        Name = Regex.Replace(item.QuerySelector("a").TextContent.Trim(), "[\".]", "")
                    };
                    filmSession.HallName = Regex.Replace(subItem.QuerySelector(".filmZal").TextContent.Trim(), "[\".]", "");

                    AngleSharp.Dom.IElement el = subItem.QuerySelector(".filmShows .time a");
                    filmSession.DateTime = DateTime.Parse(
                        (el == null) ? date + " 14:00" : date + " " + el.TextContent.Trim());

                    filmSession.Price      = subItem.QuerySelector(".filmPrices").TextContent.Trim();
                    filmSession.SeatIsFree = seatIsFree;
                    filmSessionList.Add(filmSession);
                }
            }
            return(filmSessionList);
        }
Esempio n. 29
0
        private static string GetSynonyms(string kata)
        {
            // Memanggil method CreateClient untuk membuat object WebClientEx baru.
            var client = CreateClient();

            // Mendapatkan response berupa HTML string
            var response = client.DownloadString(BaseURL + kata);

            // Parse response menggunakan HtmlParser (AngleSharp)
            var parser = new AngleSharp.Parser.Html.HtmlParser();
            var parsed = parser.Parse(response);

            // Select element menggunakan selector dan ambil text content
            var sinonim = parsed.QuerySelector(Selector)?.TextContent;

            return(sinonim ?? "Maaf, sinonim tidak ditemukan");
        }
Esempio n. 30
0
        private void button3_Click(object sender, EventArgs e)
        {
            readFile();
            article = text;
            article = article + Environment.NewLine + GetLsi();
            Clipboard.SetText(article);
            webBrowser1.DocumentText = article;
            var parser   = new AngleSharp.Parser.Html.HtmlParser();
            var document = parser.Parse(article);

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchors = document.QuerySelectorAll("a");
            foreach (AngleSharp.Dom.IElement anchor in anchors)
            {
                wName.Text = anchor.InnerHtml;
                wUrl.Text  = anchor.GetAttribute("href");
            }
        }
Esempio n. 31
0
        public async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson)
        {
            configData.LoadValuesFromJson(configJson);
            var pairs = new Dictionary<string, string> {
                { "username", configData.Username.Value },
                { "password", configData.Password.Value }
            };

            // Get inital cookies
            CookieHeader = string.Empty;
            var response = await RequestLoginAndFollowRedirect(LoginPostUrl, pairs, CookieHeader, true, null, LoginUrl);

            await ConfigureIfOK(response.Cookies, response.Content != null && response.Content.Contains("/logout.php"), () =>
            {
                var parser = new AngleSharp.Parser.Html.HtmlParser();
                var document = parser.Parse(response.Content);
                var messageEl = document.QuerySelector(".error_text");
                var errorMessage = messageEl.TextContent.Trim();
                throw new ExceptionWithConfigData(errorMessage, configData);
            });
            return IndexerConfigurationStatus.RequiresTesting;
        }
Esempio n. 32
0
        private List<ReleaseInfo> ParseResponse(string htmlResponse)
        {
            TimeZoneInfo.TransitionTime startTransition = TimeZoneInfo.TransitionTime.CreateFloatingDateRule(new DateTime(1, 1, 1, 3, 0, 0), 3, 5, DayOfWeek.Sunday);
            TimeZoneInfo.TransitionTime endTransition = TimeZoneInfo.TransitionTime.CreateFloatingDateRule(new DateTime(1, 1, 1, 4, 0, 0), 10, 5, DayOfWeek.Sunday);
            TimeSpan delta = new TimeSpan(1, 0, 0);
            TimeZoneInfo.AdjustmentRule adjustment = TimeZoneInfo.AdjustmentRule.CreateAdjustmentRule(new DateTime(1999, 10, 1), DateTime.MaxValue.Date, delta, startTransition, endTransition);
            TimeZoneInfo.AdjustmentRule[] adjustments = { adjustment };
            TimeZoneInfo romaniaTz = TimeZoneInfo.CreateCustomTimeZone("Romania Time", new TimeSpan(2, 0, 0), "(GMT+02:00) Romania Time", "Romania Time", "Romania Daylight Time", adjustments);

            List<ReleaseInfo> releases = new List<ReleaseInfo>();

            try
            {
                var parser = new AngleSharp.Parser.Html.HtmlParser();
                var document = parser.Parse(htmlResponse);
                var rows = document.QuerySelectorAll("#highlight > tbody > tr:not(:First-child)");

                foreach (var row in rows)
                {
                    var release = new ReleaseInfo();

                    var linkNameElement = row.QuerySelector("a.torrent_name_link");

                    release.Title = linkNameElement.GetAttribute("title");
                    release.Description = release.Title;
                    release.Guid = new Uri(SiteLink + linkNameElement.GetAttribute("href"));
                    release.Comments = release.Guid;
                    release.Link = new Uri(SiteLink + row.QuerySelector("td.table_links > a").GetAttribute("href"));
                    release.Category = TvCategoryParser.ParseTvShowQuality(release.Title);
                    release.Seeders = ParseUtil.CoerceInt(row.QuerySelector("td.table_seeders").TextContent.Trim());
                    release.Peers = ParseUtil.CoerceInt(row.QuerySelector("td.table_leechers").TextContent.Trim()) + release.Seeders;
                    release.Size = ReleaseInfo.GetBytes(row.QuerySelector("td.table_size").TextContent);
                    release.MinimumRatio = 1;
                    release.MinimumSeedTime = 172800;

                    DateTime pubDateRomania;
                    var dateString = row.QuerySelector("td.table_added").TextContent.Trim();
                    if (dateString.StartsWith("Today "))
                    { pubDateRomania = DateTime.SpecifyKind(DateTime.UtcNow.Date, DateTimeKind.Unspecified) + TimeSpan.Parse(dateString.Split(' ')[1]); }
                    else if (dateString.StartsWith("Yesterday "))
                    { pubDateRomania = DateTime.SpecifyKind(DateTime.UtcNow.Date, DateTimeKind.Unspecified) + TimeSpan.Parse(dateString.Split(' ')[1]) - TimeSpan.FromDays(1); }
                    else
                    { pubDateRomania = DateTime.SpecifyKind(DateTime.ParseExact(dateString, "d-MMM-yyyy HH:mm:ss", CultureInfo.InvariantCulture), DateTimeKind.Unspecified); }

                    DateTime pubDateUtc = TimeZoneInfo.ConvertTimeToUtc(pubDateRomania, romaniaTz);
                    release.PublishDate = pubDateUtc.ToLocalTime();

                    releases.Add(release);
                }
            }
            catch (Exception ex)
            {
                OnParseError(htmlResponse, ex);
            }

            return releases;
        }