コード例 #1
0
 public IAsyncOperation <HtmlNode> TranslateAsync(string targetLangCode)
 {
     return(AsyncInfo.Run(async token =>
     {
         var node = HtmlNode.CreateNode(Content.OuterHtml);
         foreach (var item in node.Descendants("#text"))
         {
             var data = item.GetInnerText();
             var uri = $"https://translate.google.cn/translate_a/single?client=gtx&dt=t&ie=UTF-8&oe=UTF-8"
                       + $"&sl=auto&tl={targetLangCode}&q={Uri.EscapeDataString(data)}";
             var transRetHtml = await transClient.GetStringAsync(new Uri(uri));
             var obj = JsonConvert.DeserializeObject <JArray>(transRetHtml);
             var objarr = (JArray)obj[0];
             var translated = string.Concat(objarr.Select(a => a[0].ToString()));
             item.InnerHtml = HtmlEntity.Entitize(translated);
         }
         TranslatedContent = node;
         return node;
     }));
 }
コード例 #2
0
        /** -------------------------------------------------------------------- **/

        public static string CleanHtmlText(string Text)
        {
            string CleanedText = Text;

            if (!string.IsNullOrEmpty(CleanedText))
            {
                try
                {
                    CleanedText = HtmlEntity.DeEntitize(CleanedText);
                }
                catch (Exception ex)
                {
                    DebugMsgStatic(string.Format("CleanBodyText: {0}", ex.Message));
                }

                CleanedText = CleanText(Text: CleanedText);
            }

            return(CleanedText);
        }
コード例 #3
0
        private IRestResponse <T> _request <T>(string http_method, string resource, dict data)
            where T : new()
        {
            var request = new RestRequest()
            {
                Resource = resource, RequestFormat = DataFormat.Json
            };

            // add the parameters to the request
            foreach (KeyValuePair <string, string> kvp in data)
            {
                request.AddParameter(kvp.Key, HtmlEntity.Convert(kvp.Value), ParameterType.QueryString);
            }

            //set the HTTP method for this request
            switch (http_method.ToUpper())
            {
            case "GET": request.Method = Method.GET;
                break;

            case "POST": request.Method = Method.POST;
                request.Parameters.Clear();
                request.AddParameter("application/json", request.JsonSerializer.Serialize(data), ParameterType.RequestBody);
                break;

            case "DELETE": request.Method = Method.DELETE;
                break;

            default: request.Method = Method.GET;
                break;
            }

            IRestResponse <T> test = new RestResponse <T>();

            client.ExecuteAsync <T>(request, response =>
            {
                test = response.Data as IRestResponse <T>;
            });

            return(test);
        }
コード例 #4
0
ファイル: FilterPolicy.cs プロジェクト: zm79287/web-security
        /// <summary>
        /// 验证属性的值是否有效
        /// </summary>
        /// <param name="attr"></param>
        /// <param name="value"></param>
        /// <returns></returns>
        public static bool ValidateAttribute(PolicyAttribute attr, string value)
        {
            if (attr == null || string.IsNullOrWhiteSpace(value))
            {
                return(false);
            }
            value = HtmlEntity.DeEntitize(value.Trim());
            ////验证是否在限定的值之内
            if (attr.AllowedValues != null)
            {
                foreach (string allowedValue in attr.AllowedValues)
                {
                    if (allowedValue != null && allowedValue.Equals(value, StringComparison.OrdinalIgnoreCase))
                    {
                        return(true);
                    }
                }
            }

            if (attr.AllowedRegExp != null)
            {
                ///验证是否符合指定的正则表达式
                foreach (string ptn in attr.AllowedRegExp)
                {
                    string pattern = ptn;
                    if (!pattern.StartsWith("^"))
                    {
                        pattern = "^" + pattern;
                    }
                    if (!pattern.EndsWith("$"))
                    {
                        pattern = pattern + "$";
                    }
                    if (Regex.IsMatch(value, pattern))
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }
コード例 #5
0
        /// <summary>
        /// Searches for download links on the service.
        /// </summary>
        /// <param name="query">The name of the release to search for.</param>
        /// <returns>List of found download links.</returns>
        public override IEnumerable <Link> Search(string query)
        {
            var html = Utils.GetHTML(Site + "search.php?search=" + Utils.EncodeURL(query),
                                     request: req =>
            {
                req.Accept = "*/*";
                req.Headers[HttpRequestHeader.AcceptLanguage] = "en";
                req.AutomaticDecompression = DecompressionMethods.None;
            });

            var links = html.DocumentNode.SelectNodes("//table/tr/td[2]");

            if (links == null)
            {
                yield break;
            }

            foreach (var node in links)
            {
                var link = new Link(this);

                link.Release = HtmlEntity.DeEntitize(node.InnerText).Trim();
                link.Quality = FileNames.Parser.ParseQuality(link.Release);
                link.Size    = HtmlEntity.DeEntitize(node.GetTextValue("../td[4]")).Trim().Replace("M", " MB");
                link.Infos   = HtmlEntity.DeEntitize(node.GetTextValue("../td[5]")).Trim();

                var tdt = node.GetAttributeValue("title");

                if (tdt.Contains("Nuked"))
                {
                    var rgx = Regex.Match(HtmlEntity.DeEntitize(tdt), "<font color='red'>([^<]+)");

                    if (rgx.Success)
                    {
                        link.Infos += ", Nuked: " + rgx.Groups[1].Value;
                    }
                }

                yield return(link);
            }
        }
コード例 #6
0
        static void Plain(StringBuilder builder, ref ToPlainTextState state, IEnumerable <HtmlNode> nodes)
        {
            foreach (var node in nodes)
            {
                if (node is HtmlTextNode text)
                {
                    Process(builder, ref state, HtmlEntity.DeEntitize(text.Text).ToCharArray());
                }
                else
                {
                    var tag = node.Name.ToLower();

                    if (tag == "br")
                    {
                        builder.AppendDoubleLine();
                        state = ToPlainTextState.StartLine;
                    }
                    else if (NonVisibleTags.Contains(tag))
                    {
                    }
                    else if (InlineTags.Contains(tag))
                    {
                        Plain(builder, ref state, node.ChildNodes);
                    }
                    else
                    {
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendDoubleLine();
                            state = ToPlainTextState.StartLine;
                        }
                        Plain(builder, ref state, node.ChildNodes);
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendDoubleLine();
                            state = ToPlainTextState.StartLine;
                        }
                    }
                }
            }
        }
コード例 #7
0
        public override ArticalOverview[] ReadIndexPage(string url, HtmlDocument doc, out string nextPageUrl)
        {
            nextPageUrl = null;

            var container = doc.GetElementbyId("maincontent");
            if (container == null) return null;

            var liTags = Helper.AllChild(container, "li");
            if (liTags == null) return null;

            List<ArticalOverview> data = new List<ArticalOverview>();
            foreach (var li in liTags)
            {
                var aLink = Helper.AnyChild(li, "a");
                data.Add(new ArticalOverview()
                {
                    LinkOfActualArtical = Helper.CombindUrl(webDir, aLink.GetAttributeValue("href", "")),
                    Title = HtmlEntity.DeEntitize(aLink.InnerText)
                });
            }

            var pageContainer = Helper.AnyChild(container, "td", new Dictionary<string, string>() { ["align"] = "center" });
            if (pageContainer != null)
            {
                try
                {

                    var currentPageIndex = int.Parse(Helper.AnyChild(pageContainer, "font").InnerText);
                    var allLinks = Helper.AllChild(pageContainer, "a");

                    foreach (var link in allLinks)
                    {
                        if (link.InnerText == (currentPageIndex + 1).ToString())
                            nextPageUrl = Helper.CombindUrl(webDir, link.GetAttributeValue("href", ""));
                    }
                }
                catch (Exception) { }
            }

            return data.Count > 0 ? data.ToArray() : null;
        }
コード例 #8
0
        private static string GetTextFromHtml(string html)
        {
            if (string.IsNullOrEmpty(html))
            {
                return("");
            }

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(html);

            var sb = StringExtensions.StringBuilderPool.Rent();

            GetTextFromNodes(sb, htmlDoc.DocumentNode.ChildNodes);

            var result = HtmlEntity.DeEntitize(sb.ToString());

            StringExtensions.StringBuilderPool.Return(sb);

            return(result);
        }
コード例 #9
0
ファイル: TentaclePage.cs プロジェクト: Teitoku42/Tentacle
        public string GetPrettyTitle()
        {
            var      Nodes      = GetPageInfo();
            HtmlNode TargetNode = FindInfoNode("title");

            if (TargetNode == null)
            {
                MessageBox.Show("Couldn't find title.");
                return("");
            }

            foreach (var CurrNode in TargetNode.ChildNodes)
            {
                if (CurrNode.Attributes.Count > 0 && CurrNode.Attributes[0].Value == "pretty")
                {
                    return(HtmlEntity.DeEntitize(CurrNode.InnerHtml));
                }
            }

            return("");
        }
コード例 #10
0
        /// <summary>
        /// HtmlAgilityPack to parse an HtmlDocument for text and return a string of text.
        /// </summary>
        /// <param name="doc"></param>
        /// <returns></returns>
        private string ParseHtmlDocumentText(HtmlDocument htmlDoc)
        {
            if (htmlDoc == null)
            {
                throw new ArgumentNullException("ParseHtmlDocumentText");
            }

            string text = string.Empty;


            foreach (HtmlNode node in htmlDoc.DocumentNode.SelectNodes("./descendant-or-self::*[not(self::script or self::style)]/text()[not(normalize-space(.)='')]"))
            {
                if (!string.IsNullOrEmpty(node.InnerText.Trim()))
                {
                    var temp = HtmlEntity.DeEntitize(node.InnerText);
                    text += temp.Trim() + " ";
                }
            }

            return(text);
        }
コード例 #11
0
        public Dictionary <IHero, IStat> FetchMostPlayedHeroes(string playerId)
        {
            HtmlNode root = mainController.HtmlDocumentController.GetDotabuffPlayerRoot(playerId);

            IEnumerable <HtmlNode> mostPlayedHeroesNode = root.SelectNodes(PlayerPath.MostPlayedHeroes.List.Value);

            Dictionary <IHero, IStat> mostPlayedHeros = new Dictionary <IHero, IStat>();

            for (int i = 1; i < mostPlayedHeroesNode.Count() + 1; i++)
            {
                string heroReference =
                    HtmlEntity.DeEntitize(
                        root.SelectSingleNode(mainController.CombinePathWithListCount(PlayerPath.MostPlayedHeroes.Hero.Value, i))
                        .Attributes[HtmlAttributes.LastPlayedMatches.ReferenceAttribute.Value].Value).Replace(HtmlAttributes.Hero.Replace.Value, "");
                Hero hero = GetHero(heroReference);
                Stat stat = statController.MapHtmlNode(root, i);
                mostPlayedHeros.Add(hero, stat);
            }

            return(mostPlayedHeros);
        }
コード例 #12
0
        public static string GetAllInnerText(HtmlNode startNode, string htmlSeparator)
        {
            string allInnerText = string.Empty;
            var    curNode      = startNode.NextSibling;

            while (curNode != null)
            {
                //break out of the parse loop if we reach the next section
                if (curNode.OuterHtml.StartsWith(htmlSeparator))
                {
                    break;
                }
                else
                {
                    string curInnerText = HtmlEntity.DeEntitize(curNode.InnerText);
                    allInnerText += curInnerText;
                    curNode       = curNode.NextSibling;
                }
            }
            return(allInnerText);
        }
コード例 #13
0
        public string GetToken(string html)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(html);

            var div     = doc.DocumentNode.SelectSingleNode("//div[@data-react-class='Header']");
            var content = div.GetAttributeValue("data-react-props", "");

            content = HtmlEntity.DeEntitize(content);

            dynamic data = JObject.Parse(content);

            dynamic session = JObject.Parse(data.session.ToString());

            var token = session.csrf.token.ToString();

            return(token);
            //var token = doc.DocumentNode.SelectSingleNode("//input[@name='authenticity_token']");
            //return token.GetAttributeValue("content", "");
        }
コード例 #14
0
ファイル: TTTHttp.cs プロジェクト: oddsve/TheGym
        public static string getHTTP(string url)
        {
            isError      = false;
            ErrorMessage = "";
            // now we can send out cookie along with a request for the protected page
            HttpWebRequest webRequest = ( HttpWebRequest )WebRequest.Create(url);

            webRequest.ContentType     = "application/x-www-form-urlencoded";
            webRequest.CookieContainer = cookies;

            StreamReader responseReader = new StreamReader(webRequest.GetResponse().GetResponseStream(), Encoding.UTF7);

            // and read the response
            string responseData = HtmlEntity.DeEntitize(responseReader.ReadToEnd());

            responseReader.Close();

            findErrors(responseData);

            return(responseData);
        }
コード例 #15
0
        internal List <AbilityBuild> FetchMostPopularAbilityBuild(string heroReference)
        {
            HtmlNode root = HtmlDocumentController.GetDotabuffHeroRoot(heroReference);

            IList <HtmlNode> mostPopularAbilityBuildNodes     = root.SelectNodes(HeroPath.MostPopularAbilitBuild.Abilities.Value);
            IList <HtmlNode> mostPopularAbilityBuildIconNodes = root.SelectNodes(HeroPath.MostPopularAbilitBuild.Icons.Value);

            List <Ability> abilities = FetchAbilities(heroReference);

            List <AbilityBuild> abilityBuildList = new List <AbilityBuild>();

            for (int i = 0; i < mostPopularAbilityBuildNodes.Count(); i++)
            {
                AbilityBuild abilityBuild = new AbilityBuild();

                string abilityName = HtmlEntity.DeEntitize(mostPopularAbilityBuildIconNodes[i].Attributes["alt"].Value);

                abilityBuild.Ability = abilities.First(ability => ability.Name.Contains(abilityName));



                abilityBuild.LevelBuild = new List <int>();
                IEnumerable <HtmlNode> levelBuildNodes =
                    mostPopularAbilityBuildNodes[i].Descendants("div")
                    .Where(
                        d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("entry choice"));


                List <int> abilityLevelBuild = new List <int>();
                foreach (HtmlNode levelBuildNode in levelBuildNodes)
                {
                    abilityLevelBuild.Add(int.Parse(levelBuildNode.InnerText));
                }

                abilityBuild.LevelBuild = abilityLevelBuild;
                abilityBuildList.Add(abilityBuild);
            }

            return(abilityBuildList);
        }
コード例 #16
0
ファイル: Problem.cs プロジェクト: z93blom/adventofcode
        public static Problem Parse(int year, int day, string url, string html, string input)
        {
            var document = new HtmlDocument();

            document.LoadHtml(html);
            var md      = $"original source: [{url}]({url})\n";
            var answers = "";

            foreach (var article in document.DocumentNode.SelectNodes("//article"))
            {
                md += UnparseList("", article) + "\n";

                var answerNode = article.NextSibling;
                while (answerNode != null && !(
                           answerNode.Name == "p" &&
                           answerNode.SelectSingleNode("./code") != null &&
                           answerNode.InnerText.Contains("answer"))
                       )
                {
                    answerNode = answerNode.NextSibling;
                }

                var code = answerNode?.SelectSingleNode("./code");
                if (code != null)
                {
                    answers += code.InnerText + "\n";
                }
            }
            var title = HtmlEntity.DeEntitize(document.DocumentNode.SelectNodes("//h2").First().InnerText);

            var match = Regex.Match(title, ".*: (.*) ---");

            if (match.Success)
            {
                title = match.Groups[1].Value;
            }
            return(new Problem {
                Year = year, Day = day, Title = title, ContentMd = md, Input = input, Answers = answers
            });
        }
コード例 #17
0
        public async Task SetData(ProductsCategory category)
        {
            try
            {
                IsLoading = true;
                HttpClient httpClient = new HttpClient();
                var html = await httpClient.GetStringAsync(@"http://piranigroup.com.pk/product-2/");
                var htmlDocument = new HtmlDocument();
                htmlDocument.LoadHtml(html);

                var nodes = htmlDocument.DocumentNode.Descendants("div")
                    .Where(x => x.GetAttributeValue("class", "").Equals(category.CategoryUrl));

                foreach (var htmlNode in nodes)
                {
                    var product = new Product
                    {
                        ProductName = HtmlEntity.DeEntitize(htmlNode.Descendants("h2").FirstOrDefault()?.InnerText),
                        ProductImage = HtmlEntity.DeEntitize(htmlNode.Descendants("img").FirstOrDefault()?
                                                         .ChildAttributes("src")
                                                         .FirstOrDefault()?
                                                         .Value)
                    };

                    ProductsCollection.Add(product);

                }
            }
            catch (System.Exception)
            {

                throw;
            }
            finally
            {
                IsLoading = false;
            }

            
        }
コード例 #18
0
        private MikuDbAlbumContract GetAlbumData(HtmlDocument doc, string url)
        {
            var data = new ImportedAlbumDataContract();

            string title     = string.Empty;
            var    titleElem = doc.DocumentNode.SelectSingleNode(".//h2[@class='posttitle']/a");

            if (titleElem != null)
            {
                title = HtmlEntity.DeEntitize(titleElem.InnerText);
            }

            var coverPicLink = doc.DocumentNode.SelectSingleNode(".//div[@class='postcontent']/table/tr[1]/td[1]/a/img");
            PictureDataContract coverPicture = null;

            if (coverPicLink != null)
            {
                var address = coverPicLink.Attributes["src"].Value;

                coverPicture = DownloadCoverPicture(address);
            }

            var infoBox = doc.DocumentNode.SelectSingleNode(".//div[@class='postcontent']/table/tr[1]/td[2]");

            if (infoBox != null)
            {
                ParseInfoBox(data, infoBox);
            }

            var trackListRow = FindTracklistRow(doc, (infoBox != null ? infoBox.ParentNode.NextSibling : null));

            if (trackListRow != null)
            {
                ParseTrackList(data, trackListRow);
            }

            return(new MikuDbAlbumContract {
                Title = title, Data = data, CoverPicture = coverPicture, SourceUrl = url
            });
        }
コード例 #19
0
        public override VideoUrlParseResult ParseByUrl(string url, bool getTitle)
        {
            var id = GetIdByUrl(url);

            if (string.IsNullOrEmpty(id))
            {
                return(VideoUrlParseResult.CreateError(url, VideoUrlParseResultType.NoMatcher, "No matcher"));
            }

            var requestUrl = string.Format("http://api.bilibili.tv/view?type=xml&appkey={0}&id={1}", AppConfig.BilibiliAppKey, id);

            var       request = WebRequest.Create(requestUrl);
            XDocument doc;

            try {
                using (var response = request.GetResponse())
                    using (var stream = response.GetResponseStream()) {
                        doc = XDocument.Load(stream);
                    }
            } catch (WebException x) {
                log.WarnException("Unable to load Bilibili URL " + url, x);
                return(VideoUrlParseResult.CreateError(url, VideoUrlParseResultType.LoadError, new VideoParseException("Unable to load Bilibili URL: " + x.Message, x)));
            }

            var titleElem  = doc.XPathSelectElement("/info/title");
            var thumbElem  = doc.XPathSelectElement("/info/pic");
            var authorElem = doc.XPathSelectElement("/info/author");

            if (titleElem == null)
            {
                return(VideoUrlParseResult.CreateError(url, VideoUrlParseResultType.LoadError, "No title element"));
            }

            var title  = HtmlEntity.DeEntitize(titleElem.Value);
            var thumb  = thumbElem != null ? thumbElem.Value : string.Empty;
            var author = authorElem != null ? authorElem.Value : string.Empty;

            return(VideoUrlParseResult.CreateOk(url, PVService.Bilibili, id,
                                                VideoTitleParseResult.CreateSuccess(title, author, thumb)));
        }
コード例 #20
0
        internal Gallery(
            long gid,
            string error        = null,
            string token        = "0",
            string title        = null,
            string title_jpn    = null,
            string category     = null,
            string thumb        = null,
            string uploader     = null,
            string posted       = null,
            string filecount    = null,
            long filesize       = 0,
            bool expunged       = true,
            string rating       = null,
            string torrentcount = null,
            string[] tags       = null)
            : this(gid, EToken.Parse(token.CoalesceNullOrWhiteSpace("0")), int.Parse(filecount, NumberStyles.Integer, CultureInfo.InvariantCulture))
        {
            if (error != null)
            {
                throw new Exception(error);
            }
            Available = !expunged;
            Title     = HtmlEntity.DeEntitize(title);
            TitleJpn  = HtmlEntity.DeEntitize(title_jpn);
            if (!_CategoriesForRestApi.TryGetValue(category, out var ca))
            {
                ca = Category.Unspecified;
            }

            Category            = ca;
            Uploader            = HtmlEntity.DeEntitize(uploader);
            Posted              = DateTimeOffset.FromUnixTimeSeconds(long.Parse(posted, NumberStyles.Integer, CultureInfo.InvariantCulture));
            FileSize            = filesize;
            Expunged            = expunged;
            Rating.AverageScore = double.Parse(rating, NumberStyles.Number, CultureInfo.InvariantCulture);
            TorrentCount        = int.Parse(torrentcount, NumberStyles.Integer, CultureInfo.InvariantCulture);
            Tags     = new TagCollection(this, tags.Select(tag => Tag.Parse(tag)));
            ThumbUri = ThumbClient.FormatThumbUri(thumb);
        }
コード例 #21
0
        /*
         *
         * public static string CleanText ( string Text )
         * {
         *
         * string CleanedText = "";
         *
         * if( !string.IsNullOrEmpty( Text ) )
         * {
         *
         *  CleanedText = Text;
         *
         *  CleanedText = Regex.Replace( CleanedText, @"<!.+?>", " ", RegexOptions.Singleline );
         *  CleanedText = Regex.Replace( CleanedText, @"<!--.+?-->", " ", RegexOptions.Singleline );
         *  CleanedText = Regex.Replace( CleanedText, @"[\s]+", " ", RegexOptions.Singleline );
         *  CleanedText = Regex.Replace( CleanedText, @"(?<![\w\d])([^\p{L}\p{N}\p{Sc}]+)", " ", RegexOptions.Singleline );
         *  CleanedText = Regex.Replace( CleanedText, @"([^\p{L}\p{N}\p{Sc}]+)(?![\w\d])", " ", RegexOptions.Singleline );
         *  CleanedText = Regex.Replace( CleanedText, @"([\p{P}\p{Sc}]+)(?![\w\d])", " ", RegexOptions.Singleline );
         *  CleanedText = Regex.Replace( CleanedText, @"[\s]+", " ", RegexOptions.Singleline );
         *
         *  CleanedText = CleanedText.Trim();
         *
         * }
         *
         * return( CleanedText );
         *
         * }
         *
         */

        /**************************************************************************/

        public static string CompactWhiteSpace(string Text)
        {
            string NewText = Text;

            if (!string.IsNullOrEmpty(NewText))
            {
                try
                {
                    NewText = HtmlEntity.DeEntitize(NewText);
                }
                catch (Exception ex)
                {
                    DebugMsgStatic(string.Format("CompactWhiteSpace: {0}", ex.Message));
                }

                NewText = Regex.Replace(NewText, @"[\s]+", " ", RegexOptions.Singleline);
                NewText = Regex.Replace(NewText, @"[\s]+$", "", RegexOptions.Singleline);
                NewText = Regex.Replace(NewText, @"[\r\n]+", Environment.NewLine, RegexOptions.Singleline);
            }

            return(NewText);
        }
コード例 #22
0
        private WorkAssignment ParseWorkAssignmentFromHtml(HtmlNode assignment, string date)
        {
            var assignmentId = AssignmentIdRegex.Match(assignment.GetAttributeValue("uo", string.Empty)).Groups["id"]?.Value;
            var placement    = HtmlEntity.DeEntitize(assignment.SelectSingleNode("div[@title]").InnerText);
            var timeString   = assignment.SelectSingleNode("b").InnerText;
            var timeSplit    = timeString.Split(" - ");
            var startTime    = timeSplit[0];
            var endTime      = timeSplit[1];

            var parsedStartTime = DateTime.Parse($"{date} {startTime}");
            var parsedEndTime   = DateTime.Parse($"{date} {endTime}");

            _logger.Info($"Found appointment. Date: {date}, ID: {assignmentId}, Placement: {placement}, Start: {parsedStartTime:t}, End: {parsedEndTime:t}");

            return(new WorkAssignment
            {
                Id = assignmentId,
                Start = parsedStartTime,
                End = parsedEndTime,
                Placement = placement
            });
        }
コード例 #23
0
ファイル: Scrapper.cs プロジェクト: jirkaceska/C_sharp
        private void LoadAlCapone()
        {
            int                rowPerDay = 6;
            HtmlNode           doc       = Utils.GetHtmlDoc(Constants.alCaponeUrl).DocumentNode;
            HtmlNode           menu      = doc.SelectSingleNode("//table[@class='table table-responsive']/tbody");
            HtmlNodeCollection rows      = menu.SelectNodes("./tr");

            if (rows == null)
            {
                throw new WeekendEmptyException("Pizzeria Alcapone - Brno:\nV menu nejsou o víkendu žádné položky, vraťe se v pondělí.");
            }
            int daysCount = rows.Count / rowPerDay;

            DayMenu[] dayMenus = new DayMenu[daysCount];

            for (int i = 0; i < daysCount; i++)
            {
                string   dateStr = rows[i * rowPerDay].SelectSingleNode("./td/h3").InnerText;
                DateTime date    = Utils.ParseDateTime(dateStr);

                string soup  = rows[i * rowPerDay + 1].SelectSingleNode("./td[2]/h3").InnerText;
                Food[] foods = new Food[4];

                for (int j = 2; j < rowPerDay; j++)
                {
                    HtmlNode actRow = rows[i * rowPerDay + j];

                    string description = HtmlEntity.DeEntitize(actRow.SelectSingleNode("./td[2]/h3").InnerText);
                    int    price       = Utils.ParsePrice(
                        actRow.SelectSingleNode("./td[3]/h3").InnerText);
                    foods[j - 2] = new Food(description, price);
                }
                dayMenus[i] = new DayMenu(date, soup, foods);
            }

            string restaurantName = GetRestaurantName(doc);

            SaveRestaurant(restaurantName, dayMenus, Restaurants.AlCapone);
        }
コード例 #24
0
ファイル: HAP.cs プロジェクト: rfcclub/Voz
        public static string ProcessQuote(HtmlNode quote, HtmlDocument doc)
        {
            RemoveViewPost(quote);
            string quoteContent = "";

            quoteContent = Model.Post.GetQuoteBorderTop();

            HtmlNode td = quote.SelectSingleNode("./table").SelectSingleNode(".//td[@class='alt2']");

            if (td != null)
            {
                HtmlNodeCollection quotes = td.SelectNodes("./div[@style='margin:20px; margin-top:5px; ']");
                if (quotes != null)
                {
                    foreach (HtmlNode q in quotes)
                    {
                        string   s       = ProcessQuote(q, doc);
                        HtmlNode newNode = doc.CreateElement("title");
                        newNode.InnerHtml = HtmlDocument.HtmlEncode(s);
                        q.ParentNode.ReplaceChild(newNode, q);
                    }
                }
                //check if quote has user and link
                HtmlNode strongUserName = td.SelectSingleNode(".//strong");
                if (strongUserName != null)                  //user quote exist
                {
                    string quoteUser = "******" + HtmlEntity.DeEntitize(strongUserName.InnerText.Trim()) + "</b>";
                    quoteContent += quoteUser + "<br>";
                    td.RemoveChild(td.Element("div"));
                }
                quoteContent += HtmlEntity.DeEntitize(td.InnerText.Trim());
                quoteContent += Model.Post.GetQuoteBorderBottom();
            }
            else
            {
                quoteContent += HtmlEntity.DeEntitize(td.InnerText.Trim()).Trim();
            }
            return(quoteContent);
        }
コード例 #25
0
        public ProductDetail GetProductDetail(string link)
        {
            var          _web     = HtmlWebSingleton.GetInstance();
            HtmlDocument document = _web.Load(baseLink + link);
            // set detail where we take the first class contain all item we want to selectoe
            //var detail = document.DocumentNode.QuerySelector(".product-details");
            var detail  = document.DocumentNode.QuerySelector(".f-wrap");
            var product = new ProductDetail();

            //product.Description = HtmlEntity.DeEntitize(detail.QuerySelector(".info .simple-prop").InnerHtml.InsertNewLine().RemoveHtmlTag());
            product.Description = HtmlEntity.DeEntitize(detail.QuerySelector(".fs-dtbox main_spec .fs-tsright >li").InnerHtml.InsertNewLine().RemoveHtmlTag());
            //product.Description = detail.QuerySelector(".info .simple-prop").InnerHtml;
            product.ProductName = HtmlEntity.DeEntitize(detail.QuerySelector("h1").InnerText);
            product.Price       = detail.QuerySelector(".product-price span").InnerText;
            var links = detail.QuerySelectorAll("#slider1_container > div > div  img:first-child").ToList();

            foreach (var item in links)
            {
                product.ImageLinkLst.Add(item.Attributes["src"].Value);
            }
            return(product);
        }
コード例 #26
0
        public List <MyPurdueSubject> ParseHtml(string content)
        {
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(content);
            HtmlNode           root            = document.DocumentNode;
            HtmlNodeCollection termSelectNodes = root.SelectNodes("//select[@id='subj_id'][1]/option");
            var subjects = new List <MyPurdueSubject>();

            foreach (var node in termSelectNodes)
            {
                var code = HtmlEntity.DeEntitize(node.Attributes["VALUE"].Value).Trim();
                var name = HtmlEntity.DeEntitize(node.InnerText).Trim();
                name = name.Substring(name.IndexOf("-") + 1);
                subjects.Add(new MyPurdueSubject()
                {
                    SubjectCode = code,
                    SubjectName = name
                });
            }
            return(subjects);
        }
コード例 #27
0
ファイル: UrlBot.cs プロジェクト: JnrnZEDb/IrcBot
        private string GetTitle(string url)
        {
            try
            {
                var req = (HttpWebRequest)HttpWebRequest.Create(url);
                req.UserAgent         = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7";
                req.AllowAutoRedirect = true;
                var response = req.GetResponse();

                if (!response.ContentType.Contains("text/html"))
                {
                    return(null);
                }

                var doc = new HtmlDocument();
                doc.Load(response.GetResponseStream());
                var    titleMeta = doc.DocumentNode.SelectSingleNode("//meta[@name='title']");
                string title     = "N/A";

                if (titleMeta != null)
                {
                    title = titleMeta.Attributes["content"].Value;
                }
                else
                {
                    var node = doc.DocumentNode.SelectSingleNode("//title");
                    if (node != null)
                    {
                        title = node.InnerText.Trim();
                    }
                }

                return(HtmlEntity.DeEntitize(title));
            }
            catch (Exception e)
            {
                return(null);
            }
        }
コード例 #28
0
        private void ParseAnchor(HtmlNode node)
        {
            if (Logger.IsDebugEnabled)
            {
                Logger.Debug(string.Format("a: {0}, {1}", node.Name, node.InnerText));
            }

            var hrefAttr = node.Attributes["href"];
            var href     = hrefAttr == null ? null : hrefAttr.Value;

            foreach (var child in node.ChildNodes)
            {
                if (_currentParagraph == null)
                {
                    _currentParagraph = CreateParagraph();
                    //_currentParagraph = new Paragraph();
                }

                switch (child.Name.ToLowerInvariant())
                {
                case "img":
                    ParseImage(child);
                    break;

                case "#text":
                    var text = child.InnerText;
                    if (!StringUtil.IsNullOrWhitespace(text))
                    {
                        var run = new Run(HtmlEntity.DeEntitize(text));
                        if (!StringUtil.IsNullOrWhitespace(href))
                        {
                            run.Link = new Link(href);
                        }
                        _currentParagraph.LineSegments.Last().InsertAfter(run);
                    }
                    break;
                }
            }
        }
コード例 #29
0
        /// <summary>
        /// This methods searches all nodes that have a span tag with times new roman and multiple blanks
        /// </summary>
        /// <param name="node"></param>
        /// <returns></returns>
        private static List <string> IdentifyNodesWithBulletPoints(HtmlNode node)
        {
            List <string> xpaths = new List <string>();

            if (node.Name.Contains("span"))
            {
                foreach (var nodethatmatches in node.Attributes.Where(x => x.Name.Contains("style") && x.Value.Contains("Times New Roman")))
                {
                    if (string.IsNullOrWhiteSpace(HtmlEntity.DeEntitize(node.InnerHtml)))
                    {
                        xpaths.Add(nodethatmatches.XPath);
                    }
                }
            }

            foreach (var childNode in node.ChildNodes)
            {
                xpaths.AddRange(IdentifyNodesWithBulletPoints(childNode));
            }

            return(xpaths);
        }
コード例 #30
0
        private string getTextFromHtml(string html)
        {
            string       ret = "";
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(html);
            StringBuilder sb = new StringBuilder();

            if (doc != null && doc.DocumentNode != null)
            {
                var textNodes = doc.DocumentNode.SelectNodes("//text()");
                if (textNodes != null)
                {
                    foreach (HtmlTextNode node in textNodes)
                    {
                        sb.AppendLine(HtmlEntity.DeEntitize(node.Text));
                    }
                    ret = sb.ToString();
                }
            }
            return(ret);
        }