SelectNodes() public method

Selects a list of nodes matching the XPath expression.
public SelectNodes ( string xpath ) : HtmlAgilityPack.HtmlNodeCollection
xpath string The XPath expression.
return HtmlAgilityPack.HtmlNodeCollection
        private EuroMillionsResult ParseResultSection(HtmlNode section)
        {
            var date = DateTime.ParseExact(section.SelectSingleNode(".//div[@class = 'floatLeft']/a").InnerText, "dd/MM/yyyy", CultureInfo.InvariantCulture);

            var balls = section.SelectNodes(".//td[@class = 'euro-ball-s']").Select(x => Convert.ToInt32(x.InnerText));
            var bonusBalls = section.SelectNodes(".//td[@class = 'euro-lucky-star-s']").Select(x => Convert.ToInt32(x.InnerText));

            return new EuroMillionsResult(date, 0, balls.ToList(), bonusBalls.ToList());
        }
Ejemplo n.º 2
1
        public bool SetValue(HtmlNode n, string value)
        {
            if (n is HtmlNode && n.Name == "select")
            {
                foreach (HtmlNode o in n.SelectNodes("option"))
                {
                    o.SetAttributeValue("selected", o.GetAttributeValue("value", "").Equals(value) ? "selected" : "");
                }
                return true;
            }

            if (n is HtmlNode && n.Name == "input")
            {
                switch (n.GetAttributeValue("type", ""))
                {
                    case "radio":
                        n.SetAttributeValue("checked", n.GetAttributeValue("value", "").Equals(value) ? "checked" : "");
                        break;
                    default:
                        n.SetAttributeValue("value", value);
                        break;
                }
                n.SetAttributeValue("value", value);
                return true;
            }

            return false;
        }
        internal static List<ChannelItemInfo> ParseNode(HtmlNode node, bool abroadOnly)
        {
            var items = new List<ChannelItemInfo>();

            if (node == null)
                return items;

            var playableArticles = node.SelectNodes(".//article[contains(@class, 'playJsInfo-Core') or contains(@class, 'slick_item')]");

            if (playableArticles != null)
                foreach (var article in playableArticles)
                {
                    var playable = ParsePlayableArticle(article, abroadOnly);
                    if (playable != null)
                        items.Add(playable);
                }

            var folderArticles = node.SelectNodes(".//article[not(contains(@class, 'playJsInfo-Core') or contains(@class, 'slick_item'))]");

            if (folderArticles != null)
                foreach (var article in folderArticles)
                {
                    var folder = ParseFolderArticle(article);
                    if (folder != null)
                        items.Add(folder);
                }


            return items;

        }
Ejemplo n.º 4
0
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads = @"Accept: application/json, text/javascript, */* q=0.01 " +
                           @"Accept-Encoding: gzip, deflate " +
                           @"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 " +
                           @"Connection: keep-alive " +
                           @"Cookie: s_ViewType=10; _lxsdk_cuid=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _lxsdk=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _hc.v=6c48a318-c117-5df7-478a-f0f694f1570e.1591768948; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1591768950,1591788446; _lxsdk_s=1729dfc18eb-4f6-3ef-94c%7C%7C19; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1591788446 " +
                           @"Host: catdot.dianping.com " +
                           @"Referer: http:/www.dianping.com/search…/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE " +
                           @"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0";
            string url = @"http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE";
            ClassHttpRequestClient s   = new ClassHttpRequestClient(true);
            HtmlDocument           doc = new HtmlDocument();
            string content             = "";
            string response            = s.httpPost(url, heads, content, Encoding.UTF8);

            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit     = item.SelectNodes("div[@class=\"txt\"]")[0];
                HtmlAgilityPack.HtmlNode aname      = divtit.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0];
                sb.Append(string.Format("{0}—{1}—{2}", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }
Ejemplo n.º 5
0
        private List<Movie> ParseMovieListHtml(HtmlNode htmlNode, string xPath)
        {
            try
            {
                var movies = new List<Movie>();
                var hnc = htmlNode.SelectNodes(xPath);
                var hnc2 = htmlNode.SelectNodes("//div/a/img");
                if (hnc.Count < 1)
                    return null;

                for (int i = 0; i < hnc.Count; i++)
                {
                    var node1 = hnc2[i];
                    var node = hnc[i];
                    var movie = new Movie();
                    var hac = node1.Attributes;

                    movie.Grade =node.SelectSingleNode("div[@class='fm-movie-desc']/div/span[@class='fm-rating']").InnerText.Replace("\n", "").RemoveSpace().Trim();
                    movie.Image = hac[0].Value.Replace("-poster100","").Trim();
                    movie.Name = node.SelectNodes("div[@class='fm-movie-desc']/div")[0].InnerText.Replace("\n","").RemoveSpace().Trim();
                    movie.Director =node.SelectNodes("div[@class='fm-movie-desc']/div")[2].InnerText.Replace("\n", "").RemoveSpace().Trim();
                    movie.Story = node.SelectNodes("div[@class='fm-movie-desc']/div")[3].InnerText.Replace("\n", "").RemoveSpace().Trim(); 
                    movie.Actor =node.SelectNodes("div[@class='fm-movie-desc']/div")[4].InnerText.Replace("\n","").RemoveSpace().Trim();
                    movie.DetailUrl = node.SelectSingleNode("div[@class='fm-movie-cover']/a").Attributes["href"].Value.Trim();
                    movies.Add(movie);
                }
                return movies;
            }
            catch (Exception e)
            {
                throw e;
            }
        }
        public YoutubeVideoEntry(HtmlNode node)
        {
            var url_node = node.SelectNodes(".//a[@href]");

            if (url_node != null)
            {
                var url_value = url_node.FirstOrDefault().Attributes["href"].Value;
                var splitIndex = url_value.IndexOf("&");

                if (splitIndex > 0)
                {
                    url = "http://www.youtube.com" + url_value.Substring(0, splitIndex);
                }
            }

            var title_node = node.SelectNodes(".//span[contains(@class, 'video-title')]");

            if (title_node != null)
                title = title_node.FirstOrDefault().InnerText;

            if (!String.IsNullOrEmpty(title))
                title = title.Trim();

            var img_node = node.SelectNodes(".//img[@src]");

            if (img_node != null)
                imageUrl = "http:" + img_node.FirstOrDefault().Attributes["src"].Value;
        }
Ejemplo n.º 7
0
 protected override string retrieveTitle(HtmlNode node)
 {
     string title = node.SelectNodes("//div[contains(@class, 'entry-content')]//strong"
         + "|//div[contains(@class, 'entry-content')]//b")?.First()?.InnerText ?? "";
     node.SelectNodes("//div[contains(@class, 'entry-content')]//strong"
         + "|//div[contains(@class, 'entry-content')]//b")?.First()?.Remove();
     return title;
 }
Ejemplo n.º 8
0
        /// <summary>
        /// Run xpath from html or node
        /// </summary>
        private List<KeyValuePair<string, object>> run(HtmlNode node)
        {
            Factory.Instance.iInfo(string.Format("Running xpathSingle id : {0}", rule.id));

            if (node == null)
                return new List<KeyValuePair<string, object>>();

            //Get all attriibutes by type and save to List<KeyValuePair<string, object>>
             foreach (Db.xpathSingleAttributes attr in rule.attributes)
            {
                object val = null;

                if (attr.getType == Db.xpathSingleAttributesGetType.nodeCollection)
                    val = node.SelectNodes(attr.xpath);
                else if (attr.getType == Db.xpathSingleAttributesGetType.count)
                {
                    HtmlNodeCollection c = node.SelectNodes(attr.xpath);
                    if (c != null)
                        val = c.Count.ToString();
                }
                else
                {
                    string val2 = string.Empty;
                    HtmlNode n = node.SelectSingleNode(attr.xpath);
                    if (n != null)
                    {
                        if (attr.getType == Db.xpathSingleAttributesGetType.singleNode)
                            val = n;
                        else
                        {
                            if (attr.getType == Db.xpathSingleAttributesGetType.text)
                                val2 = n.InnerText.Trim();

                            if (attr.getType == Db.xpathSingleAttributesGetType.html)
                                val2 = n.InnerHtml.Trim();

                            if (attr.getType == Db.xpathSingleAttributesGetType.attribute)
                            {
                                if (n.Attributes[attr.attributeName] != null)
                                    val2 = n.Attributes[attr.attributeName].Value;
                            }



                            val = postProcessResult(val2, attr);
                            if(attr.getType != Db.xpathSingleAttributesGetType.html && attr.getType != Db.xpathSingleAttributesGetType.nodeCollection && 
                                attr.getType != Db.xpathSingleAttributesGetType.singleNode)
                            Factory.Instance.iInfo(string.Format("{0} = {1}",attr.id,val));
                        }
                    }
                }
                res.Add(new KeyValuePair<string, object>(attr.id, val));
            }
                       
            return res;
        }
 public HtmlNodeCollection GetDishInfoList(HtmlNode dishTypeNode)
 {
     var baseCollectionSite = new BaseCollectionSite(PageUrl);
     var dishNodeList = dishTypeNode.SelectNodes(DishesPath());
     if (dishNodeList == null || dishNodeList.Count <= 0)
     {
         return new HtmlNodeCollection(null);
     }
     var scripNode = dishTypeNode.SelectSingleNode(@"./../../../../..//div[@class='rec-dishes tab-item active']/div[@class='pic-list J_toggle']/ul/script");
     if (scripNode != null && !string.IsNullOrWhiteSpace(scripNode.InnerText))
     {
         var liNodeList = baseCollectionSite.BaseHtmlNodeCollection(scripNode.InnerText);
         if (liNodeList != null)
         {
             var dishLiList = liNodeList.SelectNodes(".//li");
             if (dishLiList != null)
             {
                 foreach (var dishLi in dishLiList)
                 {
                     dishNodeList.Add(dishLi);
                 }
             }
         }
     }
     return dishNodeList;
 }
Ejemplo n.º 10
0
        public static H.HtmlNodeCollection SelectNodesOrEmpty(this H.HtmlNode node, string xpath)
        {
            Requires.NonNull(node, nameof(node));
            var nodeCollection = node.SelectNodes(xpath);

            return(nodeCollection ?? new H.HtmlNodeCollection(node));
        }
        private void ParseRegionElement(HtmlNode region)
        {
            var regionTitle = region.SelectSingleNode("h2").InnerText;

            foreach (var server in region.SelectNodes(".//div[@class=\"server\" or @class=\"server alt\"]"))
            {
                var serverName = server.SelectSingleNode(".//div[@class=\"server-name\"]").InnerText.Trim();

                var pollCategoryValue = new PollCategoryValue();
                var possibleCategoryMatch = Categories.FirstOrDefault(p => string.Compare(p.Region, regionTitle, true) == 0 && string.Compare(p.ServerCategory, serverName) == 0);
                if (possibleCategoryMatch == null)
                    continue;

                pollCategoryValue.CategoryID = possibleCategoryMatch.PollCategoryID;
                pollCategoryValue.Status = PollStatusType.Unknown;
                pollCategoryValue.CreatedTime = DateTime.Now;
                foreach (var div in server.SelectNodes("div"))
                {
                    if (div.OuterHtml.Contains("status-icon"))
                    {
                        pollCategoryValue.Status = div.OuterHtml.Contains("status-icon up") ? PollStatusType.Up : PollStatusType.Down;
                    }
                }
                DB.InsertPollCategoryValue(pollCategoryValue);
            }
        }
Ejemplo n.º 12
0
 public List<FormElement> GetOptions(HtmlNode htmlNode)
 {
     List<FormElement> options = new List<FormElement>();
     HtmlNodeCollection nodeTags = htmlNode.SelectNodes(@".//option");
     if (nodeTags != null)
     {
         foreach (HtmlNode node in nodeTags)
         {
             string id = node.GetAttributeValue("id", "");
             string type = "option";
             string name = node.GetAttributeValue("name", "");
             string value = node.GetAttributeValue("value", "");
             bool chk = node.Attributes["selected"] != null;
             FormElement el = new FormElement();
             el.Id = id;
             el.Type = type;
             el.Name = node.NextSibling.InnerText;
             el.Value = value;
             el.Type = type;
             el.Checked = chk;
             options.Add(el);
         }
     }
     return options;
 }
Ejemplo n.º 13
0
        private int AddSubcats(HtmlNode node, RssLink parentCat)
        {
            var subs = node.SelectNodes(".//article");

            foreach (var sub in subs)
            {
                RssLink subcat = new RssLink()
                {
                    ParentCategory = parentCat
                };
                subcat.Name  = HttpUtility.HtmlDecode(sub.SelectSingleNode(".//a[@title]").Attributes["title"].Value.Trim());
                subcat.Url   = FormatDecodeAbsolutifyUrl(parentCat.Url, sub.SelectSingleNode(".//a[@href]").Attributes["href"].Value, null, UrlDecoding.None);
                subcat.Thumb = getThumb(sub.SelectSingleNode(".//picture/img"));

                parentCat.SubCategories.Add(subcat);
            }

            var np = node.SelectSingleNode(".//a[@href and text()='More shows']");

            nextPageAvailable = false;
            if (np != null)
            {
                string url   = CreateUrl(parentCat.Url, np.Attributes["href"].Value);
                var    npCat = new NextPageCategory()
                {
                    Url = url, ParentCategory = parentCat
                };
                parentCat.SubCategories.Add(npCat);
            }

            parentCat.SubCategoriesDiscovered = true;
            return(parentCat.SubCategories.Count);
        }
Ejemplo n.º 14
0
        private static Nhl_Games_Rtss MapHtmlRowToModel(HtmlNode row, NhlSeasonType nhlSeasonType)
        {
            HtmlNodeCollection tdNodes = row.SelectNodes(@"./td");

            Nhl_Games_Rtss model = new Nhl_Games_Rtss();

            model.NhlSeasonType = nhlSeasonType;
            model.Date = Convert.ToDateTime(tdNodes[0].InnerText.Replace("'", "/"));
            model.Year = NhlModelHelper.GetSeason(model.Date).Item2;

            model.GameNumber = Convert.ToInt32(tdNodes[1].InnerText);
            model.Visitor = tdNodes[2].InnerText;
            model.Home = tdNodes[3].InnerText;

            model.RosterLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[4]);
            model.GameLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[5]);
            model.EventsLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[6]);
            model.FaceOffsLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[7]);
            model.PlayByPlayLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[8]);
            model.ShotsLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[9]);
            model.HomeToiLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[10]);
            model.VistorToiLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[11]);
            model.ShootoutLink = NhlGamesRtss.ParseLinkFromTd(tdNodes[12]);

            return model;
        }
Ejemplo n.º 15
0
        private static void AddPackage(SteamApp app, HtmlNode packageNode)
        {
            var package = app.AddNewPackage();

            var packageTitleNode = packageNode.SelectSingleNode($"//{PackageTitle}");

            package.Title = packageTitleNode.InnerHtml.Replace("Buy ", "").Trim();

            var priceNodes = packageNode.SelectNodes($"//div[@class='{PackagePriceXPath}']");

            if (priceNodes != null)
            {
                var priceNode = priceNodes[0];

                package.CurrentPrice = ParseNodeWithCurrencyToDecimal(priceNode);

                package.OriginalPrice = package.CurrentPrice;
            }
            else
            {
                var originalPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageOriginalPriceXPath}']");

                package.OriginalPrice = ParseNodeWithCurrencyToDecimal(originalPriceNode);

                var discountPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageDiscountPriceXPath}']");

                package.CurrentPrice = ParseNodeWithCurrencyToDecimal(discountPriceNode);
            }
        }
        public IList<Flight> ParseFlights(HtmlNode documentNode)
        {
            var element = documentNode.SelectNodes("//table[@class='resultTable dealsResults']/tbody//tr[position()>1]");

            IEnumerable<Flight> flights = new List<Flight>();

            if (element == null)
                return flights.ToList();

            flights = from row in element
                      where row.HasChildNodes
                      let departureAirport = row.ChildNodes[1].InnerText
                      let destination = row.ChildNodes[3].InnerText
                      let departureDate = row.ChildNodes[5].SelectSingleNode("ul/li").InnerText
                      let returnDate = row.ChildNodes[5].SelectSingleNode("ul/li[position()>1]").InnerText
                      let departFlightTime = row.ChildNodes[7].SelectSingleNode("ul/li/ul/li").InnerText
                      let returnFlightTime = row.ChildNodes[7].SelectSingleNode("ul/li[position()>1]/ul/li").InnerText
                      let noOfNights = row.ChildNodes[9].InnerText
                      let departureAirportCode = row.ChildNodes[13].SelectSingleNode("fieldset/input[@id='depAP']").GetAttributeValue("value", "N/a")
                      let arrivalAirportCode = row.ChildNodes[13].SelectSingleNode("fieldset/input[@id='retAP']").GetAttributeValue("value", "N/a")
                      let seats = row.SelectSingleNode("td[@class='seatsLeft']").ChildNodes.Count > 2 ? row.SelectSingleNode("td[@class='seatsLeft']/div").InnerText : "0"
                      select new Flight
                          {
                              DepartureAirport = new Airport { Code = departureAirportCode, Name = departureAirport },
                              ArrivalAirport = new Airport { Code = arrivalAirportCode, Name = destination },
                              ArrivalDate = (departureDate + " " + departFlightTime + ":00").ToFormattedDateString(),
                              SeatsLeft = seats.ToInt32(),
                              DepartureDate =(returnDate + " " + returnFlightTime + ":00").ToFormattedDateString(),
                              NoOfNights = noOfNights.ToInt32()
                          };

            return flights.ToList();
        }
Ejemplo n.º 17
0
    public static void Classification(Menu menu, agi.HtmlNode node)
    {
        agi.HtmlNodeCollection divide_td = node.SelectNodes(".//td");
        agi.HtmlNodeCollection check_div = divide_td[0].SelectNodes(".//div");
        agi.HtmlNodeCollection check_br  = divide_td[0].SelectNodes(".//br");

        int count = check_br.Count;

        if (check_div == null)
        {
            return;
        }
        if (count > 2)
        {
            String text = divide_td[0].InnerHtml;
            text = text.Replace("<br>", "</div><div>");
            divide_td[0].InnerHtml = text;
            agi.HtmlNodeCollection tmp = divide_td[0].SelectNodes(".//div");
            for (int i = 0; i < tmp.Count; i++)
            {
                menu.menu.Add(tmp[i].InnerText);
            }
        }
        else
        {
            //menu.menu.Add(node.InnerText);
            for (int i = 0; i < count; i++)
            {
                menu.menu.Add(check_div[i].InnerText);
            }
        }
    }
Ejemplo n.º 18
0
        private static List<Item> extractSelect(HtmlNode html, string nameSelect)
        {
            List<Item> country = new List<Item>();

            var criterioBusqueda = "//select[@name='"+ nameSelect +"']";

            foreach (HtmlNode item in html.SelectNodes(criterioBusqueda))
            {
                string[] valueText = item.InnerText.Split('\n');

                List<HtmlNode> values = item.Elements("option").ToList();

                for (int i = 0; i < values.Count; i++)
                {
                    var aux = values[i].GetAttributeValue("value", "");
                    if (!aux.Equals(""))
                    {
                        int parse = int.Parse(aux);

                        //tener en cuenta que en el caso de la pagina ejemplo hay un elemento vacio al inicio.
                        //puede que no pase asi con todas.
                        string value = valueText[i + 1];

                        country.Add(new Item(parse, value));
                    }
                }
            }
            return country;
        }
Ejemplo n.º 19
0
        private static string GetBody(HtmlNode docNode)
        {
            var paragraphs = docNode.SelectNodes("//div[@class='delfi-article-body']//p");

            var text = String.Join(" ", paragraphs.Elements().Select(e => e.InnerText));
            return text;
        }
Ejemplo n.º 20
0
        private void SearchFromNode(HtmlNode baseNode)
        {
            var nodes = Enumerable.Empty<HtmlNode>();

            if (!_html.DocumentNode.HasChildNodes)
                ParseHtml();

            if (chkXPath.IsChecked == true)
                nodes = baseNode.SelectNodes(txtSearchTag.Text);
            else
                nodes = baseNode.Descendants(txtSearchTag.Text);

            if (nodes == null) return;

            listResults.Items.Clear();

            foreach (var node in nodes)
            {
                var tr = new NodeTreeView { BaseNode = node };
                var lvi = new ListBoxItem();
                var pnl = new StackPanel();
                pnl.Children.Add(new Label
                                     {
                                         Content =
                                             string.Format("id:{0} name:{1} children{2}", node.Id, node.Name,
                                                           node.ChildNodes.Count),
                                         FontWeight = FontWeights.Bold
                                     });
                pnl.Children.Add(tr);
                lvi.Content = pnl;
                listResults.Items.Add(lvi);
            }
            tabControl1.SelectedItem = tabSearchResults;
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Identify Macros
        /// </summary>
        /// <param name="skeleton">skeleton</param>
        /// <param name="pageType">page Type</param>
        /// <returns></returns>
        private void IdentifyMacros(HtmlNode skeleton, PageType pageType)
        {
            var menuProperties = new List<PropertyDTO>();
            var doc = skeleton.OwnerDocument;
            var propertyFactory = factory.PropertyFactory;
            foreach (var menuNode in pageType.MacroXpaths
                .SelectMany(xpath => skeleton.SelectNodes(xpath))
                .Where(n => n != null && n.ParentNode != null))
            {
                var property = propertyFactory.GetNew();
                menuProperties.Add(property);

                var propertyNode = doc.CreateTextNode(property.TemplateReference);
                menuNode.ParentNode.ReplaceChild(propertyNode, menuNode);
            }

            var macros = menuProperties.Select(p => new Definition
            {
                Number = p.Number,
                Name = p.Name,
                TemplateReference = p.TemplateReference,
                IsMacro = true
            });
            pageType.Definitions.AddRange(macros);
        }
Ejemplo n.º 22
0
        private ProductBasicData BuildProductBasicData(HtmlNode productNode)
        {
            log.DebugFormat("[BuildProductBasicData] OuterHtml= {0}.", productNode.OuterHtml);
            //throw new NotImplementedException();
            ProductBasicData returnValue = new ProductBasicData();

            //should be transfered to barcode, but last digitals of the barcode is productId
            returnValue.Barcode = productNode.Attributes.First(c => c.OriginalName == "DdPiD").Value;
            returnValue.ProductId = productNode.Attributes.First(c => c.OriginalName == "DdPiD").Value;
            returnValue.pbcatid = productNode.Attributes.First(c => c.OriginalName == "pbcatid").Value;
            returnValue.qty = productNode.Attributes.First(c => c.OriginalName == "qty").Value;
            returnValue.iq = productNode.Attributes.First(c => c.OriginalName == "iq").Value;
            returnValue.inb = productNode.Attributes.First(c => c.OriginalName == "inb").Value;

            //returnValue.ImageSource = productNode.SelectSingleNode("//img[@src]").Attributes.FirstOrDefault(c=> c.Name == "src").Value;

            returnValue.ImageSource = productNode.SelectNodes("child::*/child::div/child::img").First().Attributes.First(c => c.Name == "src").Value;
            returnValue.EffectivePrice = productNode.SelectSingleNode("child::*/child::div/child::div/child::div/child::span").InnerText;

            //returnValue.EffectivePrice = productNode.SelectSingleNode("//span[@id='spnEffectivePrice']").InnerText;
            //returnValue.Description = HttpUtility.HtmlDecode(productNode.SelectSingleNode("//div[@class='ProdBoxSupplierText']").InnerText);

            returnValue.Description = HttpUtility.HtmlDecode(productNode.SelectSingleNode("child::*/child::div/child::div/child::a").InnerText);
            returnValue.ProductName = HttpUtility.HtmlDecode(productNode.SelectSingleNode("child::*/child::div/child::div/child::a").InnerText);
            log.DebugFormat("[BuildProductBasicData] fetched product={0}.", returnValue.ToString());
            return returnValue;
        }
Ejemplo n.º 23
0
        private static Nhl_Players_Rtss_Skater MapHtmlRowToModel(HtmlNode row, NhlSeasonType nhlSeasonType, int year)
        {
            HtmlNodeCollection tdNodes = row.SelectNodes(@"./td");

            Nhl_Players_Rtss_Skater model = new Nhl_Players_Rtss_Skater();

            model.NhlSeasonType = nhlSeasonType;
            model.Year = year;

            model.Number = 0;
            model.Name = tdNodes[1].InnerText;
            model.Team = tdNodes[2].InnerText;
            model.Position = tdNodes[3].InnerText;

            model.GamesPlayed = ConvertStringToInt(tdNodes[4].InnerText);
            model.Hits = ConvertStringToInt(tdNodes[5].InnerText);
            model.BlockedShots = ConvertStringToInt(tdNodes[6].InnerText);
            model.MissedShots = ConvertStringToInt(tdNodes[7].InnerText);
            model.Giveaways = ConvertStringToInt(tdNodes[8].InnerText);
            model.Takeaways = ConvertStringToInt(tdNodes[9].InnerText);
            model.FaceoffsWon = ConvertStringToInt(tdNodes[10].InnerText);
            model.FaceoffsLost = ConvertStringToInt(tdNodes[11].InnerText);
            model.FaceoffsTaken = ConvertStringToInt(tdNodes[12].InnerText);
            model.FaceoffWinPercentage = Convert.ToDouble(tdNodes[13].InnerText);
            model.PercentageOfTeamFaceoffsTaken = Convert.ToDouble(tdNodes[14].InnerText);
            model.Shots = ConvertStringToInt(tdNodes[15].InnerText);
            model.Goals = ConvertStringToInt(tdNodes[16].InnerText);
            model.ShootingPercentage = Convert.ToDouble(tdNodes[17].InnerText);

            return model;
        }
Ejemplo n.º 24
0
        private IList<Achievement> ParseSubAchievements(Achievement achievement, HtmlNode subAchievementNode)
        {
            IList<Achievement> subAchievements = new List<Achievement>();
            HtmlNodeCollection achievements = subAchievementNode.SelectNodes("./li");
            if (achievements != null)
            {
                foreach (HtmlNode subNode in achievements)
                {
                    // TODO : Parse out achievement id
                    if ( subNode.Attributes["onmousemove"] != null )
                    {
                        Match match = parseTooltip.Match(subNode.Attributes["onmousemove"].Value);
                        if ( match.Success )
                        {
                            string subAchievementId = match.Groups["achievementid"].Value;
                            int blizzardId = 0;
                            int.TryParse(subAchievementId, out blizzardId);
                            Achievement subAchievement = _service.Find(blizzardId);
                            if (subAchievement == null)
                            {
                                subAchievement = new Achievement() { BlizzardID = blizzardId };
                                subAchievement.Name = GetValueAsString(subNode, ".//h3");
                                subAchievement.Description = GetValueAsString(subNode, ".//div[@class='color-tooltip-yellow']");
                                subAchievement.Points = GetValueAsInt32(subNode, ".//span[@class='points border-3']");
                                _service.Save(subAchievement);
                            }
                            achievement.Points = achievement.Points - subAchievement.Points;
                            subAchievements.Add(subAchievement);
                        }
                    }
                }
            }

            return subAchievements;
        }
Ejemplo n.º 25
0
        /// <summary>
        /// Run xpath from html or node
        /// </summary>
        public List<List<KeyValuePair<string, object>>> run(HtmlNode node)
        {
            Factory.Instance.iInfo(string.Format("Running xpathCollection id : {0}", rule.id));

            HtmlNodeCollection nodes = new HtmlNodeCollection(node);
            HtmlNodeCollection n2 = node.SelectNodes(rule.xpath);
            if (n2 != null)
            {
                foreach (HtmlNode n in n2)
                    nodes.Add(n);
            }

            //run
            if (node != null)
            {

                foreach (HtmlNode n in nodes)
                {
                    List<KeyValuePair<string, object>> last_val = null;
                    if (rule.xpathSingle != null)
                    {
                        XPathSingle xs = new XPathSingle(rule.xpathSingle, last_val);
                        last_val = (List<KeyValuePair<string, object>>)xs.Run(n);
                        res.Add(last_val);
                    }
                }
            }
            return res;
        }
Ejemplo n.º 26
0
        private static Nhl_Players_Bio_Goalie MapHtmlRowToModel(HtmlNode row, NhlSeasonType nhlSeasonType, int year)
        {
            HtmlNodeCollection tdNodes = row.SelectNodes(@"./td");

            Nhl_Players_Bio_Goalie model = new Nhl_Players_Bio_Goalie();

            model.NhlSeasonType = nhlSeasonType;
            model.Year = year;

            model.Number = ConvertStringToInt(tdNodes[0].InnerText);
            model.Name = tdNodes[1].InnerText;
            model.Team = tdNodes[2].InnerText;
            model.Position = "G";
            model.DateOfBirth = Convert.ToDateTime(tdNodes[3].InnerText.Replace("'", "/"));
            model.BirthCity = tdNodes[4].InnerText;
            model.StateOrProvince = tdNodes[5].InnerText;
            model.BirthCountry = tdNodes[6].InnerText;
            model.HeightInches = ConvertStringToInt(tdNodes[7].InnerText);
            model.WeightLbs = ConvertStringToInt(tdNodes[8].InnerText);
            model.Catches = tdNodes[9].InnerText;
            model.Rookie = tdNodes[10].InnerText;
            model.DraftYear = ConvertStringToInt(tdNodes[11].InnerText);
            model.DraftRound = ConvertStringToInt(tdNodes[12].InnerText);
            model.DraftOverall = ConvertStringToInt(tdNodes[13].InnerText);

            model.GamesPlayed = ConvertStringToInt(tdNodes[14].InnerText);
            model.Wins = ConvertStringToInt(tdNodes[15].InnerText);
            model.Losses = ConvertStringToInt(tdNodes[16].InnerText);
            model.OTSOLosses = ConvertStringToInt(tdNodes[17].InnerText);
            model.GAA = Convert.ToDouble(tdNodes[18].InnerText);
            model.SavePercentage = Convert.ToDouble(tdNodes[19].InnerText);
            model.Shutouts = ConvertStringToInt(tdNodes[20].InnerText);

            return model;
        }
Ejemplo n.º 27
0
    void ParseHTMLTable(Hashtable loadLanguages, Hashtable loadEntries, HtmlAgilityPack.HtmlNode node)
    {
        int  row      = -1;
        bool firstRow = true;

        foreach (HtmlAgilityPack.HtmlNode trNode in node.SelectNodes(".//tr"))
        {
            if (trNode.SelectNodes(".//td") == null)
            {
                continue;
            }
            row++;
            int    i   = -1;
            string key = "";


            foreach (HtmlAgilityPack.HtmlNode tdNode in trNode.SelectNodes(".//td"))
            {
                i++;

                if (firstRow && row == 0)
                {
                    //Language header
                    if (i == 0)
                    {
                        continue;                         //Ignore this top-left empty cell
                    }
                    loadLanguages[i] = tdNode.InnerText;
                    if (!loadEntries.ContainsKey(i))
                    {
                        loadEntries[i] = new Hashtable();
                    }
                }
                else
                {
                    //Data rows

                    if (i == 0)
                    {
                        key = tdNode.InnerText;
                        continue;
                    }
                    if (key == "")
                    {
                        continue;                         //Skip entries with empty keys (the other values can be used as labels)
                    }
                    string    content = tdNode.InnerText;
                    Hashtable hTable  = (Hashtable)loadEntries[i];
                    if (hTable.ContainsKey(key))
                    {
                        Debug.LogError("ERROR: Double key [" + key + "]");
                        unresolvedErrors++;
                    }
                    hTable[key] = content;
                }
            }
            firstRow = false;
        }
    }
Ejemplo n.º 28
0
        private static string getItemPrice(HtmlNode itemPriceNode)
        {
            HtmlNodeCollection spans = itemPriceNode.SelectNodes("span");
            string price = itemPriceNode.InnerText.Replace("&nbsp;", " ").TrimStart();

            string result = parsePriceRecursion(spans[0].Attributes["title"].Value, price);
            return parsePrice(result);
        } 
Ejemplo n.º 29
0
        public override string ReadContent(HtmlNode node)
        {
            RemoveTags(node.SelectNodes("//*[@id='taboola-bottom-main-column']"));
            var content = node.SelectSingleNode("//*[@id='news-story']/article");

            RemoveScripts(content);
            return content.InnerHtml;
        }
Ejemplo n.º 30
0
        private void buttonParseHtml_Click(object sender, EventArgs e)
        {
            OpenFileDialog openFileDialog = new OpenFileDialog();

            openFileDialog.Filter      = "HTML File (*.html;)|*.html";
            openFileDialog.Multiselect = false;

            if (openFileDialog.ShowDialog() == DialogResult.OK)
            {
                if (String.IsNullOrEmpty(openFileDialog.FileName))
                {
                    return;
                }

                string strHtml = String.Empty;

                using (StreamReader reader = new StreamReader(openFileDialog.FileName, Encoding.UTF8))
                {
                    strHtml = reader.ReadToEnd();
                    reader.Close();
                }

                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(strHtml);                 //加载html
                foreach (var err in doc.ParseErrors)
                {
                    Console.WriteLine(err.Code);
                }

                //Console.WriteLine(doc.Text);

                HtmlAgilityPack.HtmlNode rootNode = doc.DocumentNode;                //获取文档的根节点
                //Console.WriteLine(rootNode.OuterHtml);
#if true
                string xpath = @"//table";
                HtmlAgilityPack.HtmlNode node = rootNode.SelectSingleNode(xpath);                //获取表格
                foreach (var script in node.Descendants("script").ToArray())
                {
                    script.Remove();
                }
                foreach (var style in node.Descendants("style").ToArray())
                {
                    style.Remove();
                }
                string innerText = node.OuterHtml;                //到这里就是纯纯的表格了
                var    trNodes   = node.SelectNodes("tr");

                foreach (var trnod in trNodes)                 //遍历行
                {
                    var tdNodes = trnod.SelectNodes("td");
                    for (int i = 0; i < tdNodes.Count; i++)                     //遍历列
                    {
                        Console.WriteLine(tdNodes[i].InnerText);
                    }
                }
#endif
            }
        }
Ejemplo n.º 31
0
 public static Remark ParseRemark(HtmlNode remarkNode)
 {
     var remark = new Remark();
     //发现物
     var discoveryNode = remarkNode.SelectSingleNode("a[@title!='']");
     if (discoveryNode!=null)
     {
         var levelNode = discoveryNode.PreviousSibling.PreviousSibling;
         var typeNode = levelNode.PreviousSibling.PreviousSibling;
         remark.DiscoveryType = Enum.Parse(typeof(DisType), typeRegex.Match(typeNode.Attributes["src"].Value).Groups["type"].Value).ToString();
         remark.DiscoveryLevel = Int32.Parse(levelNode.InnerText.Substring(0, 1));
         remark.DiscoveryExp = Int32.Parse(discoveryNode.Attributes["title"].Value.Remove(0,5));
         remark.Discovery = discoveryNode.InnerText;
     }
     //奖励物
     var awardNode = remarkNode.SelectSingleNode("span[@style='color:#804000;']");
     if (awardNode != null)
         remark.AwardItem = awardNode.InnerText;
     //相关任务
     var relativeNodes = remarkNode.SelectNodes("descendant::a[@style='color:#C000C0;' or @style='color:DarkBlue;']");
     if (relativeNodes != null)
     {
         foreach (HtmlNode relativeNode in relativeNodes)
         {
             IList<int> questList = null;
             IList<string> foundNameList = null;
             if (relativeNode.InnerText.StartsWith("前:"))
             {
                 foundNameList = remark.PreFoundName;
                 questList = remark.PreQuestID;
             }
             else
             {
                 questList = remark.FollowQuestID;
             }
             var match = questRegex.Match(relativeNode.Attributes["href"].Value);
             if (relativeNode.InnerText.StartsWith("前:港口-") == false)
                 questList.Add(Int32.Parse(match.Groups["id"].Value));
             else
                 foundNameList.Add(relativeNode.InnerText.Replace("前:港口-",""));
         }
     }
     //接受城市
     //last br next a
     var cityNodes = remarkNode.SelectNodes("descendant::a[@class='MisCity']");
     if (cityNodes != null)
     {
         cityNodes.All(node =>
         {
             if (node.InnerText == "南美开拓港" || node.InnerText == "东南亚开拓港" ||
                 node.InnerText == "掠夺地图" || node.InnerText == "沉船资讯")
                 return true;
             remark.FromCityList.Add(node.InnerText);
             return true;
         });
     }
     return remark;
 }
Ejemplo n.º 32
0
        public override string ReadContent(HtmlNode node)
        {
            var content = node.SelectNodes("//div").FirstOrDefault(d =>
    d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("body"));

            RemoveScripts(content);

            return CleanHtml(content.InnerHtml);
        }
Ejemplo n.º 33
0
 public static string getTable(HtmlNode table)
 {
     var data = "";
     foreach (HtmlNode row in table.SelectNodes("tr"))
     {
         data += row.InnerText + ";";
     }
     return data;
 }
Ejemplo n.º 34
0
 public IEnumerable<HtmlNode> FindWithin(HtmlNode xmlElement)
 {
     var nodes = xmlElement.SelectNodes(xpath);
     if(nodes == null)
     {
         return new List<HtmlNode>();
     }
     return nodes;
 }
Ejemplo n.º 35
0
 Dictionary<decimal, decimal> ParseOrderDepthFromResult(HtmlNode table)
 {
     return table
         .SelectNodes("tr[position() > 1]")	// Skip the header row
         .ToDictionary(
             tr => Decimal.Parse(tr.ChildNodes[0].InnerText),
             tr => Decimal.Parse(tr.ChildNodes[1].InnerText)
         );
 }
Ejemplo n.º 36
0
        public static HAP.HtmlNodeCollection SelectCollection(this HAP.HtmlNode parent, string xPath, bool doThrow)
        {
            var node = parent.SelectNodes(xPath);

            if (doThrow && node == null)
            {
                throw new NewsParserException(new { xPath });
            }
            return(node ?? new HAP.HtmlNodeCollection(parent));
        }
		public string Translate (HtmlNode commandNode)
		{
			HtmlNodeCollection cellNodes = commandNode.SelectNodes ("td");
			
			string command = cellNodes[0].InnerText;
			string target = cellNodes[1].InnerText;
			string value = cellNodes[2].InnerText;
			
			return Translate (command, target, value);
		}
Ejemplo n.º 38
0
        public FormElement(HtmlAgilityPack.HtmlNode Node, int NodeIndex) : base(Node, NodeIndex)
        {
            HtmlNodeCollection NodesColl = Node.SelectNodes(".//input");

            if (NodesColl != null)
            {
                for (int i = 0; i < NodesColl.Count; i++)
                {
                    InputElements.Add(new InputElement(NodesColl[i], i));
                }
            }

            NodesColl = Node.SelectNodes(".//select");
            if (NodesColl != null)
            {
                for (int i = 0; i < NodesColl.Count; i++)
                {
                    SelectElements.Add(new Element(NodesColl[i], i));
                }
            }
        }
Ejemplo n.º 39
0
        private void BwLoad_DoWork(object sender, DoWorkEventArgs e)
        {
            HtmlAgilityPack.HtmlWeb      web      = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument htmlDoc  = web.Load("https://www.naver.com");
            HtmlAgilityPack.HtmlNode     bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//ul[@class='ah_l']");
            HtmlAgilityPack.HtmlNode[]   node     = bodyNode.SelectNodes(".//span[@class='ah_k']").ToArray();

            foreach (HtmlNode n in node)
            {
                this.result.Add(n.InnerText);
            }
        }
Ejemplo n.º 40
0
        private static string GetDescription(HtmlAgilityPack.HtmlNode documentNode)
        {
            string description = string.Empty;
            var    node        = documentNode.SelectNodes("//h4")
                                 .SingleOrDefault(
                s =>
                s.Attributes["class"] != null && s.Attributes["class"].Value.Equals("video_top_more") &&
                s.Attributes["id"] != null && s.Attributes["id"].Value.Equals("video_top_more"));

            if (node != null)
            {
                description = node.InnerText.Replace("\r", "").Replace("\n", "").Replace("\t", "");
            }
            return(description);
        }
Ejemplo n.º 41
0
        private static string GetLinkOrigin(HtmlAgilityPack.HtmlNode documentNode)
        {
            string linkOrigin = string.Empty;
            var    node       = documentNode
                                .SelectNodes(
                "//meta")
                                .SingleOrDefault(
                a => a.Attributes["property"] != null && a.Attributes["property"].Value.Equals("og:url") &&
                a.Attributes["itemprop"] != null && a.Attributes["itemprop"].Value.Equals("url"));

            if (node != null)
            {
                linkOrigin = node.Attributes["content"].Value;
            }
            return(linkOrigin);
        }
Ejemplo n.º 42
0
        private List <VideoInfo> GetVids(HtmlNode node, string parentUrl)
        {
            List <VideoInfo> videos = new List <VideoInfo>();
            var vids = node.SelectNodes(".//article");

            foreach (var vid in vids)
            {
                VideoInfo video = new VideoInfo();
                if (vid.SelectSingleNode(".//h2[contains(@class,'h3')]") == null)
                {
                    video.Title    = HttpUtility.HtmlDecode(vid.SelectSingleNode(".//a[@title]").Attributes["title"].Value.Trim());
                    video.VideoUrl = FormatDecodeAbsolutifyUrl(parentUrl, vid.SelectSingleNode(".//a[@href]").Attributes["href"].Value, null, UrlDecoding.None);
                }
                else
                {
                    video.Title    = vid.SelectSingleNode(".//h2[contains(@class,'h3')]").InnerText.Trim();
                    video.VideoUrl = FormatDecodeAbsolutifyUrl(parentUrl, vid.SelectSingleNode(".//a[@class='teaser__link' and @href]").Attributes["href"].Value, null, UrlDecoding.None);
                    if (vid.SelectSingleNode(".//p[contains(@class,'teaser__description')]") != null)
                    {
                        video.Description = vid.SelectSingleNode(".//p[contains(@class,'teaser__description')]").InnerText.Trim();
                    }
                    else
                    {
                        video.Description = vid.SelectSingleNode(".//h3[contains(@class,'teaser__subtitle')]").InnerText.Trim();
                    }
                }
                var moNode = vid.SelectSingleNode(".//span[@data-month]");
                var daNode = vid.SelectSingleNode(".//span[@data-date]");
                if (moNode != null && daNode != null)
                {
                    video.Airdate = moNode.InnerText.Trim() + ' ' + daNode.InnerText.Trim();
                }

                video.Thumb = getThumb(vid.SelectSingleNode(".//picture/img"));
                videos.Add(video);
            }

            var np = node.SelectSingleNode(".//a[@href and contains(text(),'More ')]");

            nextPageAvailable = false;
            if (np != null)
            {
                nextPageAvailable = true;
                nextPageUrl       = CreateUrl(parentUrl, np.Attributes["href"].Value);
            }
            return(videos);
        }
Ejemplo n.º 43
0
        public object Transform(Dictionary <string, object> settings, HtmlNodeNavigator nodeNavigator, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            var ret         = new StringBuilder();
            var foundParent = false;

            var currentNode = nodeNavigator?.CurrentNode;

            if (logicalParents != null && logicalParents.Count >= 2)
            {
                // We will skip out immediate parent because that's the list, we need the parent of the list, which is out grandparent
                var grandParentNode = logicalParents[logicalParents.Count - 2];
                HtmlAgilityPack.HtmlNode parentNode = grandParentNode;

                if (settings != null && settings.ContainsKey("_startingXPath") && ((JValue)settings["_startingXPath"]).Type == JTokenType.String)
                {
                    var startingXPath = ((JValue)settings["_startingXPath"]).ToObject <string>();

                    var nodes = parentNode.SelectNodes(startingXPath);

                    if (nodes != null && nodes.Count > 0)
                    {
                        parentNode = nodes[0];
                    }
                    else
                    {
                        return(0);
                    }
                }

                while (currentNode != null && currentNode != parentNode && !foundParent)
                {
                    var siblingText = this.GetTextFromSiblings(currentNode, parentNode, ref foundParent);

                    if (!string.IsNullOrEmpty(siblingText))
                    {
                        ret.Append(siblingText);
                        ret.Append(" ");
                    }

                    currentNode = currentNode.ParentNode;
                }
            }

            var text = ret.ToString().Trim();

            return(text.Length);
        }
Ejemplo n.º 44
0
        private void GetNextPageVideosUrl(HtmlAgilityPack.HtmlNode node)
        {
            HasNextPage = false;
            nextPageUrl = "";
            var a_o_buttons = node.SelectNodes("//a[contains(@class, 'svtoa-button')]");

            if (a_o_buttons != null)
            {
                var a_o_next_button = a_o_buttons.Where(a => (a.InnerText ?? "").Contains("Visa fler")).FirstOrDefault();
                if (a_o_next_button != null)
                {
                    nextPageUrl = a_o_next_button.GetAttributeValue("href", "");
                    nextPageUrl = HttpUtility.UrlDecode(nextPageUrl);
                    nextPageUrl = HttpUtility.HtmlDecode(nextPageUrl); //Some urls come html encoded
                    HasNextPage = true;
                }
            }
        }
Ejemplo n.º 45
0
        private static List <string> GetTags(HtmlAgilityPack.HtmlNode documentNode)
        {
            List <string> tags = new List <string>();
            var           node = documentNode.SelectNodes("//div")
                                 .FirstOrDefault(c => c.Attributes["class"] != null && c.Attributes["class"].Value.Equals("tag_video"));

            if (node != null)
            {
                var nodeTags =
                    node.SelectNodes(".//a")
                    .Where(a => a.Attributes["class"] != null && a.Attributes["class"].Value.Equals("eachTag_video"))
                    .ToList();
                if (nodeTags.Any())
                {
                    tags.AddRange(nodeTags.Select(nodeTag => nodeTag.InnerText));
                }
            }
            return(tags);
        }
Ejemplo n.º 46
0
    void ParseHTMLTable(ref List <object> loadData, HtmlAgilityPack.HtmlNode node)
    {
        foreach (HtmlAgilityPack.HtmlNode trNode in node.SelectNodes(".//tr"))
        {
            if (trNode.SelectNodes(".//td") == null)
            {
                continue;
            }
            int           i       = -1;
            List <object> rowData = new List <object>();


            foreach (HtmlAgilityPack.HtmlNode tdNode in trNode.SelectNodes(".//td"))
            {
                i++;
                rowData.Add(tdNode.InnerText);
            }
            loadData.Add(rowData);
        }
    }
Ejemplo n.º 47
0
        private static string ParsePsalm(HtmlAgilityPack.HtmlNode psalmText)
        {
            var resT = string.Empty;

            if (psalmText != null)
            {
                resT = psalmText.InnerText;
                if (!psalmText.InnerText.Contains("span"))
                {
                    return(resT);
                }
                foreach (HtmlNode span in psalmText.SelectNodes(".//span"))
                {
                    string attributeValue = span.GetAttributeValue("style", "");
                    if (string.IsNullOrEmpty(attributeValue))
                    {
                        resT += $"{span.InnerHtml}";
                    }
                }
            }

            return(resT);
        }
Ejemplo n.º 48
0
 /// <summary>
 /// Selects a list of nodes matching the HtmlAgilityPack.HtmlNode.XPath expression - will not return null.
 /// </summary>
 /// <param name="node">The node.</param>
 /// <param name="xpath">The XPath expression.</param>
 /// <returns>An HtmlAgilityPack.HtmlNodeCollection containing a collection of nodes matching the HtmlAgilityPack.HtmlNode.XPath query, or an empty collection if no node matched the XPath expression.</returns>
 public static HtmlNodeCollection SelectSafeNodes(this HtmlNode node, XPathExpression xpath)
 {
     return(node.SelectNodes(xpath) ?? new HtmlNodeCollection(node));
 }
Ejemplo n.º 49
0
        /// <summary>
        /// 分析得到数据表
        /// </summary>
        /// <param name="content"></param>
        /// <returns></returns>
        private DataTable GetDataTable(Html.HtmlDocument content, bool istech)
        {
            DataTable dt = new DataTable();

            //
            Html.HtmlDocument document = content;
            Html.HtmlNode     table    = document.DocumentNode.SelectSingleNode("/html/body/table/tr[2]/td/table/tr/td[2]/table[2]/tr[" + (istech ? "3" : "2") + "]/td/div/div/table");

            //列
            DataColumn dc;

            dc = new DataColumn("教练员"); //姓名
            dt.Columns.Add(dc);
            dc = new DataColumn("车型");  //姓名
            dt.Columns.Add(dc);
            Html.HtmlNodeCollection trTime = table.SelectNodes("./tr[1]/th[1]/font/b/th");
            foreach (Html.HtmlNode tr in trTime)
            {
                dc = new DataColumn(tr.InnerText);
                dt.Columns.Add(dc);
            }
            dc = new DataColumn("教练号");
            dt.Columns.Add(dc);

            //数据区
            Html.HtmlNodeCollection trs = table.SelectNodes("./tr[position()>1]");
            DataRow dr;

            foreach (Html.HtmlNode tr in trs)
            {
                Html.HtmlNodeCollection tds = tr.SelectNodes("./td");
                dr = dt.NewRow();
                int index = 0;
                foreach (Html.HtmlNode td in tds)
                {
                    if (index == 0)
                    {
                        string name = td.SelectSingleNode("./font/span/child::text()[position()=1]").InnerText;
                        string car  = td.SelectSingleNode("./font/span/span").InnerText;
                        dr[index] = name;
                        index++;
                        dr[index] = car;
                    }
                    else
                    {
                        if (td.InnerText == "可预约")
                        {
                            dr[index] = "yunyue";
                        }
                        else
                        {
                            if (istech && index == 2)
                            {
                                dr[index] = td.SelectSingleNode("./font/span").InnerText;
                            }
                            else
                            {
                                dr[index] = td.InnerText;
                            }
                        }
                    }

                    index++;
                }
                dt.Rows.Add(dr);
            }
            return(dt);
        }
Ejemplo n.º 50
0
        public static ViewCounterModel ViewCounter(string data, ProgressDialogController progressController)
        {
            HtmlNode td;
            string   text;

            ViewCounterModel result = new ViewCounterModel();

            //data = System.IO.File.ReadAllText("data");

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            if (doc == null)
            {
                return(null);
            }
            doc.LoadHtml(data);
            if (doc.DocumentNode == null && doc.DocumentNode.ChildNodes == null)
            {
                return(null);
            }

            if (doc.DocumentNode.ChildNodes.Count > 1)
            {
                #region  азбор секции с информацией о канале связи

                HtmlAgilityPack.HtmlNode jqTabsDevices = doc.DocumentNode.SelectNodes("//div[@id='jqTabsDevices']").Single();
                if (jqTabsDevices == null)
                {
                    return(null);
                }
                HtmlNodeCollection info = jqTabsDevices.SelectNodes("div[2]/table/tbody/tr");
                if (info == null)
                {
                    return(null);
                }

                // Наименование точки учета
                td   = info[0].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.AccountPoint = text;

                // Тип счетчика
                td   = info[1].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.CounterType = text;

                // Заводской номер
                td   = info[2].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.CounterNumber = text;

                // Сетевой адрес
                td   = info[3].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.CounterNetworkAddress = text;

                // Коэффициент трансформации
                td         = info[5].SelectNodes("td[2]").Single();
                text       = td.InnerText;
                result.Ktt = text;

                // Производитель
                td   = info[6].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.CounterManufacturer = text;

                // Тип учёта
                td   = info[7].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.AccountType = text;

                // Полное название абонента
                td   = info[8].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.AbonentFullName = text;
                // Название абонента
                td   = info[9].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.AbonentName = text;
                // Короткое название абонента
                td   = info[10].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.AbonentShortName = text;

                // Подстанция
                td   = info[11].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.Substation = text;

                // Название объекта
                td   = info[12].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.ObjectName = text;

                // Название точки учета
                td   = info[13].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.AccountPointName = text;

                // Номер ТП
                td        = info[14].SelectNodes("td[2]").Single();
                text      = td.InnerText;
                result.TP = text;

                // Адрес объекта
                td   = info[15].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.ObjectAddress = text;

                // Населенный пункт объекта
                td   = info[16].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.ObjectState = text;

                // Адрес абонента
                td   = info[17].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.AbonentAddress = text;

                // Фидер
                td           = info[18].SelectNodes("td[2]").Single();
                text         = td.InnerText;
                result.Fider = text;

                // Номер договора
                td               = info[19].SelectNodes("td[2]").Single();
                text             = td.InnerText;
                result.DogNumber = text;

                // Родительский лиц счет
                td   = info[20].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.AmperParentPointId = text;

                // РЭС
                td   = info[21].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.Departament = text;

                // Зав. номер из расч системы
                td   = info[22].SelectNodes("td[2]").Single();
                text = td.InnerText;
                result.AmperCounterNumber = text;

                // Лицевой счет
                td   = info[23].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                result.AmperPointId = text;

                // Текущий статус
                td            = info[24].SelectNodes("td[2]").Single();
                text          = td.InnerText;
                result.Status = text;

                // Последний сеанс
                td   = info[25].SelectNodes("td[2]").Single();
                text = td.InnerText.Trim();
                DateTime date = new DateTime();
                result.LastSessionDate = DateTime.TryParse(
                    text,
                    System.Globalization.CultureInfo.CreateSpecificCulture("en-US"),
                    System.Globalization.DateTimeStyles.None,
                    out date) ? date : date;

                #endregion

                #region  азбор секции с показаниями

                HtmlAgilityPack.HtmlNode jqTabsSingleMeterIndications = doc.DocumentNode.SelectNodes("//div[@id='jqTabsSingleMeterIndications']").Single();
                info = jqTabsSingleMeterIndications.SelectNodes("table/tbody/tr/td");
                if (info == null)
                {
                    return(null);
                }


                IndicationViewItem ivi = new IndicationViewItem();
                ivi.PreviousIndications = new Indications();
                ivi.NextIndications     = new Indications();

                #region Парсинг

                int startIndex = 0;

                // точка
                td   = info[startIndex++];
                text = td.InnerText;
                ivi.AccountingPoint = text;

                // тип
                td              = info[startIndex++];
                text            = td.InnerText;
                ivi.CounterType = text;

                // предыдущие показания T0
                td = info[startIndex++];
                ivi.PreviousIndications.Tarriff0 = GetIndication(td.InnerText);
                // предыдущие показания T1
                td = info[startIndex++];
                ivi.PreviousIndications.Tarriff1 = GetIndication(td.InnerText);
                // предыдущие показания T2
                td = info[startIndex++];
                ivi.PreviousIndications.Tarriff2 = GetIndication(td.InnerText);
                // предыдущие показания T3
                td = info[startIndex++];
                ivi.PreviousIndications.Tarriff3 = GetIndication(td.InnerText);
                // предыдущие показания T4
                td = info[startIndex++];
                ivi.PreviousIndications.Tarriff4 = GetIndication(td.InnerText);
                // предыдущие показания достоверность
                td   = info[startIndex++];
                text = td.InnerText;
                ivi.PreviousIndications.DataReliability = text;

                // текущие показания T0
                td = info[startIndex++];
                ivi.NextIndications.Tarriff0 = GetIndication(td.InnerText);
                // текущие показания T1
                td = info[startIndex++];
                ivi.NextIndications.Tarriff1 = GetIndication(td.InnerText);
                // текущие показания T2
                td = info[startIndex++];
                ivi.NextIndications.Tarriff2 = GetIndication(td.InnerText);
                // текущие показания T3
                td = info[startIndex++];
                ivi.NextIndications.Tarriff3 = GetIndication(td.InnerText);
                // текущие показания T4
                td = info[startIndex++];
                ivi.NextIndications.Tarriff4 = GetIndication(td.InnerText);
                // предыдущие показания достоверность
                td   = info[startIndex++];
                text = td.InnerText;
                ivi.NextIndications.DataReliability = text;

                // разница
                td             = info[startIndex++];
                ivi.Difference = GetIndication(td.InnerText);

                #endregion

                result.IndicationViewItem = ivi;

                #endregion
            }
            return(result);
        }
Ejemplo n.º 51
0
        public static ViewDeviceModel ViewDevice(string data, ProgressDialogController progressController)
        {
            ViewDeviceModel result = new ViewDeviceModel();

            try
            {
                HtmlNode td;
                string   text;

                //data = System.IO.File.ReadAllText("data");

                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                if (doc == null)
                {
                    return(null);
                }
                doc.LoadHtml(data);
                if (doc.DocumentNode == null && doc.DocumentNode.ChildNodes == null)
                {
                    return(null);
                }

                if (doc.DocumentNode.ChildNodes.Count > 1)
                {
                    #region  азбор секции с информацией о канале связи

                    HtmlAgilityPack.HtmlNode jqTabsDevices = doc.DocumentNode.SelectNodes("//div[@id='jqTabsDevices']").Single();
                    if (jqTabsDevices == null)
                    {
                        return(null);
                    }
                    HtmlNodeCollection sessionInformation = jqTabsDevices.SelectNodes("div[2]/div/table/tbody/tr");
                    if (sessionInformation == null)
                    {
                        return(null);
                    }

                    result.Session = new SessionInformation();

                    // производитель модема
                    td   = sessionInformation[2].SelectNodes("td[2]").Single();
                    text = td.InnerText;
                    result.Session.ModemManufacturer = text;

                    // Модель устройства
                    td   = sessionInformation[3].SelectNodes("td[2]").Single();
                    text = td.InnerText;
                    result.Session.Model = text;

                    // описание
                    td   = sessionInformation[4].SelectNodes("td[2]").Single();
                    text = td.InnerText;
                    result.Session.Description = text;

                    // статус
                    td   = sessionInformation[6].SelectNodes("td[2]").Single();
                    text = td.InnerText;
                    result.Session.CurrentStatus = text;

                    // сеанс
                    td   = sessionInformation[7].SelectNodes("td[2]").Single();
                    text = td.InnerText;
                    DateTime date = new DateTime();
                    result.Session.LastSessionDate = DateTime.TryParse(text, out date) ? date : date;

                    #endregion

                    #region  азбор секции с показаниями

                    HtmlAgilityPack.HtmlNode jqTabsBalances = doc.DocumentNode.SelectNodes("//div[@id='jqTabsBalances']").Single();
                    if (jqTabsBalances == null)
                    {
                        return(null);
                    }
                    HtmlNodeCollection counters = jqTabsBalances.SelectNodes("table/tbody/tr");
                    if (counters == null)
                    {
                        return(null);
                    }

                    result.CountersIndications = new List <IndicationViewItem>();

                    byte startIndex = 0;
                    for (int i = 0; i < counters.Count; i++)
                    {
                        HtmlNodeCollection hnc = counters[i].SelectNodes("td");
                        startIndex = 0;

                        IndicationViewItem ivi = new IndicationViewItem();
                        ivi.PreviousIndications = new Indications();
                        ivi.NextIndications     = new Indications();

                        #region Парсинг
                        // точка
                        td   = hnc[startIndex++];
                        text = td.InnerText;
                        ivi.AccountingPoint = text;

                        // тип
                        td              = hnc[startIndex++];
                        text            = td.InnerText;
                        ivi.CounterType = text;

                        // предыдущие показания T0
                        td = hnc[startIndex++];
                        ivi.PreviousIndications.Tarriff0 = GetIndication(td.InnerText);
                        // предыдущие показания T1
                        td = hnc[startIndex++];
                        ivi.PreviousIndications.Tarriff1 = GetIndication(td.InnerText);
                        // предыдущие показания T2
                        td = hnc[startIndex++];
                        ivi.PreviousIndications.Tarriff2 = GetIndication(td.InnerText);
                        // предыдущие показания T3
                        td = hnc[startIndex++];
                        ivi.PreviousIndications.Tarriff3 = GetIndication(td.InnerText);
                        // предыдущие показания T4
                        td = hnc[startIndex++];
                        ivi.PreviousIndications.Tarriff4 = GetIndication(td.InnerText);
                        // предыдущие показания достоверность
                        td   = hnc[startIndex++];
                        text = td.InnerText;
                        ivi.PreviousIndications.DataReliability = text;

                        // текущие показания T0
                        td = hnc[startIndex++];
                        ivi.NextIndications.Tarriff0 = GetIndication(td.InnerText);
                        // текущие показания T1
                        td = hnc[startIndex++];
                        ivi.NextIndications.Tarriff1 = GetIndication(td.InnerText);
                        // текущие показания T2
                        td = hnc[startIndex++];
                        ivi.NextIndications.Tarriff2 = GetIndication(td.InnerText);
                        // текущие показания T3
                        td = hnc[startIndex++];
                        ivi.NextIndications.Tarriff3 = GetIndication(td.InnerText);
                        // текущие показания T4
                        td = hnc[startIndex++];
                        ivi.NextIndications.Tarriff4 = GetIndication(td.InnerText);
                        // предыдущие показания достоверность
                        td   = hnc[startIndex++];
                        text = td.InnerText;
                        ivi.NextIndications.DataReliability = text;

                        // разница
                        td             = hnc[startIndex++];
                        ivi.Difference = GetIndication(td.InnerText);

                        #endregion

                        result.CountersIndications.Add(ivi);
                    }
                    #endregion

                    #region Качество показаний

                    if (result.QualityIndications == null)
                    {
                        result.QualityIndications = new List <QualityIndications>();
                    }

                    HtmlNodeCollection indicationsQualityMonths = doc.DocumentNode.SelectNodes("//table[contains(@class,'tableQualityIndications')]");

                    if (indicationsQualityMonths != null)
                    {
                        int monthsCount = indicationsQualityMonths.Count;
                        for (int monthIndex = 0; monthIndex < monthsCount; monthIndex++)
                        {
                            QualityIndications qi = new QualityIndications();
                            HtmlNode           m  = indicationsQualityMonths[monthIndex].SelectNodes("thead/tr[1]/th[2]").Single();
                            qi.Period     = m == null ? "???" : m.InnerText;
                            qi.PointsData = ParseMonthQualityIndications(indicationsQualityMonths[monthIndex].SelectNodes("tbody/tr"));

                            result.QualityIndications.Add(qi);
                        }
                    }

                    #endregion
                }
            }
            catch (Exception ex)
            {
                //TODO: Добавить логирование
                _logger?.Error(ex);
                return(null);
            }

            return(result);
        }
Ejemplo n.º 52
0
        static void eventCrawler()
        {
            try
            {
                //Khai báo đường dẫn URL web cần lấy nội dung HTML
                for (int i = 0; i < 100; i++)
                {
                    string _url = "https://www.adayroi.com/thuc-pham-r591?p=";

                    HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();

                    htmlWeb.UserAgent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0";


                    HtmlAgilityPack.HtmlDocument htmlDoc = htmlWeb.Load(_url + i);
                    string _html = htmlDoc.DocumentNode.InnerHtml;
                    _html = HttpUtility.HtmlDecode(_html);
                    HtmlAgilityPack.HtmlNode           _nodThreads = htmlDoc.DocumentNode.SelectSingleNode(@"//div[@class='row body-list-item']");
                    HtmlAgilityPack.HtmlNodeCollection nodChuDe    = _nodThreads.SelectNodes(@"div");

                    //Khai báo biến tạm để hiện thị kết quả
                    string ketqua = "";

                    //Duyet qua các nod nodChuDe vừa select được

                    foreach (var n in nodChuDe)
                    {//data-brand-id data-category-id  data-product-item-id  data-merchant-id
                        string brand_id        = n.SelectSingleNode("div").Attributes["data-brand-id"].Value.ToString().Trim();
                        string category_id     = n.SelectSingleNode("div").Attributes["data-category-id"].Value.ToString().Trim();
                        string product_item_id = n.SelectSingleNode("div").Attributes["data-product-item-id"].Value.ToString().Trim();
                        string merchant_id     = n.SelectSingleNode("div").Attributes["data-merchant-id"].Value.ToString().Trim();
                        string urlImage        = n.SelectSingleNode("div/div[1]/span/a/img").Attributes["data-other-src"].Value.ToString().Trim();
                        string name            = n.SelectSingleNode("div/div[2]/div/h4").InnerText.Trim();
                        name = HttpUtility.HtmlDecode(name);
                        string gia        = n.SelectSingleNode("div/div[2]/div/div/span").InnerText.Trim();
                        string urlSanPham = n.SelectSingleNode("div/div[1]/span/a").Attributes["href"].Value.ToString();
                        if (string.IsNullOrEmpty(urlImage) || string.IsNullOrEmpty(urlImage) || string.IsNullOrEmpty(urlImage) || string.IsNullOrEmpty(urlImage))
                        {
                            break;
                        }

                        //HtmlAgilityPack.HtmlDocument htmlDocSanpham = htmlWeb.Load("https://www.adayroi.com" + urlSanPham);
                        //string _htmlSP = htmlDocSanpham.DocumentNode.InnerHtml;
                        //string _ItemInFoBlock = htmlDocSanpham.DocumentNode.SelectSingleNode(@"//div[@id='product_excerpt']").InnerText; //table table-bordered
                        //string _Product_description = htmlDocSanpham.DocumentNode.SelectSingleNode(@"//div[@id='product_description']").InnerHtml;

                        ketqua += brand_id + " $ " + category_id + " $ " + product_item_id + " $ " + merchant_id + " $ " + name + " $ " + gia + " $ " + urlImage + "\n";
                    }
                    using (FileStream fs = new FileStream(@"F:\DATA.txt", FileMode.Append, FileAccess.Write))
                        using (StreamWriter w = new StreamWriter(fs, Encoding.UTF8))
                        {
                            ketqua = HttpUtility.HtmlDecode(ketqua);
                            w.WriteLine(ketqua + "\n");
                            Console.WriteLine("Trang " + i + " OK");
                        }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.ReadLine();
            }
        }
Ejemplo n.º 53
0
        /// <summary>
        /// Search Shelfari page for possible series info, returning the next title in the series without downloading any other pages.
        /// TODO: Un-yuckify all the return paths without nesting a ton of ifs
        /// </summary>
        /// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param>
        private string GetNextInSeriesTitle(HtmlAgilityPack.HtmlDocument searchHtmlDoc)
        {
            //Added estimated reading time and page count from Shelfari, for now...
            HtmlAgilityPack.HtmlNode pageNode = searchHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='WikiModule_FirstEdition']");
            if (pageNode == null)
            {
                return("");
            }
            HtmlAgilityPack.HtmlNode node1 = pageNode.SelectSingleNode(".//div/div");
            if (node1 == null)
            {
                return("");
            }
            //Parse page count and multiply by average reading time
            Match match1 = Regex.Match(node1.InnerText, @"Page Count: ((\d+)|(\d+,\d+))");

            if (match1.Success)
            {
                double   minutes = int.Parse(match1.Groups[1].Value, NumberStyles.AllowThousands) * 1.2890625;
                TimeSpan span    = TimeSpan.FromMinutes(minutes);
                main.Log(String.Format("Typical time to read: {0} hours and {1} minutes ({2} pages)"
                                       , span.Hours, span.Minutes, match1.Groups[1].Value));
                curBook.pagesInBook    = match1.Groups[1].Value;
                curBook.readingHours   = span.Hours.ToString();
                curBook.readingMinutes = span.Minutes.ToString();
            }

            //Added highlighted passage from Shelfari, dummy info for now...
            HtmlAgilityPack.HtmlNode members = searchHtmlDoc.DocumentNode.SelectSingleNode("//ul[@class='tabs_n tn1']");
            int highlights = 0;

            if (members != null)
            {
                Match match3 = Regex.Match(members.InnerText, @"Reviews \(((\d+)|(\d+,\d+))\)");
                if (match3.Success)
                {
                    curBook.popularPassages = match3.Groups[1].Value.ToString();
                }
                match3 = Regex.Match(members.InnerText, @"Readers \(((\d+)|(\d+,\d+))\)");
                if (match3.Success)
                {
                    curBook.popularHighlights = match3.Groups[1].Value.ToString();
                    highlights = int.Parse(match3.Groups[1].Value, NumberStyles.AllowThousands);
                }
                string textPassages = curBook.popularPassages == "1"
                    ? String.Format("{0} passage has ", curBook.popularPassages)
                    : String.Format("{0} passages have ", curBook.popularPassages);
                string textHighlights = curBook.popularHighlights == "1"
                    ? String.Format("{0} time", curBook.popularHighlights)
                    : String.Format("{0} times", curBook.popularHighlights);

                main.Log(String.Format("Popular Highlights: {0}been highlighted {1}"
                                       , textPassages, textHighlights));
            }

            //If no "highlighted passages" found from Shelfari, add to log
            if (highlights == 0)
            {
                main.Log("Popular Highlights: No highlighted passages have been found for this book");
                curBook.popularPassages   = "";
                curBook.popularHighlights = "";
            }

            //Check if book series is available and displayed in Series & Lists on Shelfari page.
            HtmlAgilityPack.HtmlNode seriesNode = searchHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='WikiModule_Series']/div");
            if (seriesNode != null)
            {
                //If multiple Series found, find and use standard series.
                foreach (HtmlAgilityPack.HtmlNode seriesType in
                         seriesNode.SelectNodes(".//div"))
                {
                    if (seriesType.InnerText.Contains("(standard series)", StringComparison.OrdinalIgnoreCase) && !seriesType.InnerText.Contains("(Reading Order)", StringComparison.OrdinalIgnoreCase))
                    {
                        Match match = Regex.Match(seriesType.InnerText, @"This is book (\d+) of (\d+)");
                        if (!match.Success)
                        {
                            continue;
                        }
                        curBook.seriesName = seriesType.ChildNodes["a"].InnerText.Trim();
                        main.Log("About the series: " + seriesType.InnerText.Replace(". (standard series)", ""));
                        if (!match.Success || match.Groups.Count != 3)
                        {
                            return("");
                        }
                        curBook.seriesPosition = match.Groups[1].Value;
                        curBook.totalInSeries  = match.Groups[2].Value;
                        HtmlAgilityPack.HtmlNode seriesInfo = seriesNode.SelectSingleNode(".//p");
                        //Parse preceding book
                        if (seriesInfo != null && seriesInfo.InnerText.Contains("Preceded by ", StringComparison.OrdinalIgnoreCase))
                        {
                            match = Regex.Match(seriesInfo.InnerText, @"Preceded by (.*),", RegexOptions.IgnoreCase);
                            if (match.Success && match.Groups.Count == 2)
                            {
                                previousTitle = match.Groups[1].Value;
                            }
                            else
                            {
                                match = Regex.Match(seriesInfo.InnerText, @"Preceded by (.*)\.", RegexOptions.IgnoreCase);
                                if (match.Success && match.Groups.Count == 2)
                                {
                                    previousTitle = match.Groups[1].Value;
                                }
                            }
                            main.Log("Preceded by: " + previousTitle);
                            //Grab Shelfari Kindle edition link for this book
                            previousShelfariUrl = seriesInfo.ChildNodes["a"].GetAttributeValue("href", "") +
                                                  "/editions?binding=Kindle";
                        }
                        // Check if book is the last in the series
                        if (!curBook.seriesPosition.Equals(curBook.totalInSeries))
                        {
                            //Parse following book
                            if (seriesInfo != null && seriesInfo.InnerText.Contains("followed by ", StringComparison.OrdinalIgnoreCase))
                            {
                                match = Regex.Match(seriesInfo.InnerText, @"followed by (.*)\.", RegexOptions.IgnoreCase);
                                if (match.Success && match.Groups.Count == 2)
                                {
                                    main.Log("Followed by: " + match.Groups[1].Value);
                                    //Grab Shelfari Kindle edition link for this book
                                    nextShelfariUrl = seriesInfo.ChildNodes["a"].GetAttributeValue("href", "") + "/editions?binding=Kindle";
                                    return(match.Groups[1].Value);
                                }
                            }
                        }
                        //Stop after first standard series is found maybe
                        //add popup (already started implimentaton) in
                        //future to pick which standard series you
                        //want to use, not sure if worthwhile though.
                        //eg: http://www.shelfari.com/books/37598923
                        break;
                    }
                }
            }
            return("");
        }
        private object Extract(string name, ConfigSection config, HtmlAgilityPack.HtmlNode parentNode, List <HtmlAgilityPack.HtmlNode> logicalParents)
        {
            this.RemoveUnwantedTags(config, parentNode);

            // We will try to extract text for this item because it does not have children
            var containers = new JArray();

            if (config.XPathRules != null && config.XPathRules.Count > 0)
            {
                foreach (var xpath in config.XPathRules)
                {
                    // TODO: Add try catch Exception
                    var nodes = parentNode.SelectNodes(xpath);

                    if (nodes != null && nodes.Count > 0)
                    {
                        var newLogicalParents = logicalParents.GetRange(0, logicalParents.Count);
                        newLogicalParents.Add(parentNode);

                        foreach (var node in nodes)
                        {
                            if (config.Children != null && config.Children.Count > 0)
                            {
                                var container = new JObject();
                                this.ExtractChildren(config: config, parentNode: node, container: container, logicalParents: newLogicalParents);
                                containers.Add(container);
                            }
                            else if (config.Transformations != null && config.Transformations.Count > 0)
                            {
                                var obj = this.RunTransformations(config.Transformations, node, newLogicalParents);

                                if (obj != null)
                                {
                                    containers.Add(obj);
                                }
                            }
                            else if (node.InnerText != null)
                            {
                                containers.Add(HtmlEntity.DeEntitize(node.InnerText).Trim());
                            }
                        }
                    }
                }
            }
            else
            {
                var container = new JObject();
                this.ExtractChildren(config: config, parentNode: parentNode, container: container, logicalParents: logicalParents);
                containers.Add(container);
            }

            if (!config.ForceArray && containers.Count == 0)
            {
                return(new JObject());
            }
            else if (!config.ForceArray && containers.Count == 1)
            {
                return(containers.First);
            }
            else
            {
                return(containers);
            }
        }
Ejemplo n.º 55
0
        private void textBox1_KeyDown(object sender, KeyEventArgs e)
        {
            if (e.KeyCode == Keys.Enter)
            {
                String serial = "INVALID_SERIAL";
                if (textBox1.Text.Length > 0)
                {
                    serial = textBox1.Text;
                }
                try
                {
                    webBrowser1.AllowNavigation = true;
                    string parsedHTML = "";
                    string movieTitle = "UNKNOWN";
                    string imgSrc     = "UNKNOWN";
                    //string html = new WebClient().DownloadString("https://www.google.com/search?q=" + serial);

                    //parsedHTML = parseHTML(html);

                    //richTextBox1.Text = html;
                    //webBrowser1.DocumentText = html;

                    HtmlWeb web = new HtmlWeb();
                    document = web.Load("https://www.google.com/search?q=" + serial);

                    HtmlAgilityPack.HtmlNode bodyNode = document.DocumentNode.SelectSingleNode("//td[@id='rhs_block']");
                    richTextBox1.Text = document.DocumentNode.InnerHtml;

                    if (bodyNode.InnerText.Length > 1)
                    {
                        webBrowser1.DocumentText = bodyNode.InnerHtml;
                        foreach (var image in bodyNode.SelectNodes(".//img"))
                        {
                            var src = image.GetAttributeValue("src", null);

                            if (src != null)
                            {
                                imgSrc = src;
                                break;
                            }
                        }

                        textBox2.Enabled = false;
                        normalQuery      = true;

                        try
                        {
                            movieTitle = bodyNode.SelectSingleNode("//div[@class='_B5d']").InnerText;
                        }
                        catch (Exception ex)
                        {
                            textBox2.Enabled = true;
                        }
                    }
                    else
                    {
                        normalQuery      = false;
                        textBox2.Enabled = true;
                        webBrowser1.Navigate("https://www.google.com/search?q=" + serial);// = new WebClient().DownloadString("https://www.google.com/search?q=" + serial);                    }
                        if (list.Count == 0)
                        {
                            list.Add(new String[] { serial + " (1)", movieTitle, imgSrc });
                            listView1.Items.Add(serial + " (1)");
                            listBox2.Items.Add(movieTitle);
                        }
                        else
                        {
                            for (int i = 0; i < list.Count; ++i)
                            {
                                if (list[i][0].ToString().Contains(serial))
                                {
                                    int count = Int32.Parse(list[i][0].ToString().Substring(list[i][0].ToString().IndexOf('(')).Substring(1, list[i][0].ToString().Substring(list[i][0].ToString().IndexOf('(')).Length - 2));
                                    list[i][0] = list[i][0].ToString().Substring(0, list[i][0].ToString().IndexOf('(')) + "(" + ++count + ")";
                                    listView1.Items.Clear();
                                    listBox2.Items.Clear();
                                    foreach (String[] j in list)
                                    {
                                        listView1.Items.Add(j[0]);
                                        listBox2.Items.Add(j[1]);
                                    }
                                    i = list.Count;
                                    break;
                                }
                                else if (i == list.Count - 1)
                                {
                                    list.Add(new String[] { serial + " (1)", movieTitle, imgSrc });
                                    listView1.Items.Add(serial + " (1)");
                                    listBox2.Items.Add(movieTitle);
                                    i = list.Count;
                                    break;
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    textBox1.Text = "INVALID SERIAL";
                }
                textBox1.Text      = "";
                e.SuppressKeyPress = true;
            }
        }
Ejemplo n.º 56
0
        public override void Parse(Response response)
        {
            //Create a new HTMLAglityPack document
            HtmlDocument ContentDocument = new HtmlDocument();

            //load the #content of the page into the document
            ContentDocument.LoadHtml(response.Css("#content").First().OuterHtml);
            HtmlAgilityPack.HtmlNode BodyNode = ContentDocument.DocumentNode;
            patternObject.Title = BodyNode.SelectSingleNode("//*[@id=\"firstHeading\"]").InnerHtml;
            HtmlAgilityPack.HtmlNode ContentNode = BodyNode.SelectSingleNode("//*[@id=\"mw-content-text\"]");

            //remove the "toc" and "jump" and "siteSub" sections to save space and later client-side processing time
            if (ContentNode.SelectSingleNode("//*[@id=\"toc\"]") != null)
            {
                ContentNode.SelectSingleNode("//*[@id=\"toc\"]").Remove();
            }

            foreach (var node in ContentNode.SelectNodes("//comment()"))
            {
                node.Remove();
            }

            ContentNode.PrependChild(BodyNode.SelectSingleNode("//*[@id=\"firstHeading\"]"));

            //set the patternObject's title
            patternObject.Title = ContentNode.SelectSingleNode("//*[@id=\"firstHeading\"]").InnerHtml;


            foreach (var link in ContentNode.SelectNodes("//a/@href"))
            {
                //skip if this is a redlink (page doesn't exist).
                if (link.Attributes["href"].Value.Contains("redlink=1"))
                {
                    continue;
                }
                //skip if this links to this page
                if (link.Attributes["href"].Value.Split('#').First() == response.FinalUrl)
                {
                    continue;
                }

                //if any of the links ancestor nodes is the "category links" part of the page
                if (link.Ancestors().Any(node => node.Id == "catlinks"))
                {
                    if (link.InnerText != "Categories") //if it is not the "categories" special page
                    {
                        //add it to the patterns list of categories
                        patternObject.Categories.Add(link.InnerText);
                    }
                }
                else //assume its a normal text-body link
                {
                    //check if we don't already know about this link
                    patternObject.CreateOrGetPatternLink(link.InnerText);
                }

                //add relation info if this is a relation link
                if (GetNodeReleventPageHeading(link, "h2") != null &&
                    GetNodeReleventPageHeading(link, "h2").InnerText == "Relations")
                {
                    //get the relation type of this relation and get its inner text
                    HtmlAgilityPack.HtmlNode RelationHeadingNode = GetNodeReleventPageHeading(link, "h3");
                    String RelationName = RelationHeadingNode.InnerText;

                    //if there is a h4 node before the previous h3 node
                    if (GetNodeReleventPageHeading(link, "h4") != null &&
                        RelationHeadingNode.InnerStartIndex < GetNodeReleventPageHeading(link, "h4").InnerStartIndex)
                    {
                        //assume it is a "with x" sub-category of relation for the "Can Instantiate" section
                        RelationName = RelationHeadingNode.InnerText + " " + GetNodeReleventPageHeading(link, "h4").InnerText;
                    }

                    //add the relevent relation to this link
                    patternObject.CreateOrGetPatternLink(link.InnerText).Type.Add(RelationName);
                }
            }

            //get a cleaned copy of the #content HTML for giving in the JSON data
            patternObject.Content = ProcessPageContentToString(ContentNode);

            string Json = JsonConvert.SerializeObject(patternObject);

            File.WriteAllText(Pattern.GetFileName(patternObject.Title), Json);
        }
 public static IList <HtmlNode> SelectNodesAsList(
     this HtmlNode node,
     string xpath) => node.SelectNodes(xpath)?.ToList() ?? new List <HtmlNode>(0);
Ejemplo n.º 58
0
        public List <Entry> parseFile(string file)
        {
            List <Entry> result = new List <Entry>();

            if (!File.Exists(file))
            {
                log.Error(String.Format("File \"{0}\" does not exist.", file));
            }
            else
            {
                log.InfoFormat("Parsing file : {0}", file);

                HtmlDocument htmlDoc = new HtmlDocument();

                htmlDoc.Load(file);

                if (htmlDoc.DocumentNode != null)
                {
                    HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body");

                    foreach (HtmlParseError error in htmlDoc.ParseErrors)
                    {
                        log.Error(String.Format("Error \nCode: {0}\nLine: {1}\nPosition : {2}\nReason : {3}\nSource Text : {4} ", error.Code, error.Line, error.LinePosition, error.Reason, error.SourceText));
                    }

                    if (htmlDoc.ParseErrors.Count() == 0)
                    {
                        if (bodyNode != null)
                        {
                            int year = extractYear(bodyNode);

                            HtmlNodeCollection monthsNodeCollection = bodyNode.SelectNodes("//h3");

                            foreach (HtmlNode monthNode in monthsNodeCollection)
                            {
                                Month month = extractMonth(monthNode);

                                HtmlNodeCollection entryNodeCollection = monthNode.SelectNodes("following-sibling::div[1]/ul/li/h2");

                                // If entries exist for the month, resume ...
                                if (entryNodeCollection != null)
                                {
                                    foreach (HtmlNode entryNode in entryNodeCollection)
                                    {
                                        DateTime entryDate  = extractEntryDate(entryNode);
                                        String   entryTitle = extractEntryTitle(entryNode);

                                        DateStamp entryKey = new DateStamp()
                                        {
                                            Year  = year,
                                            Month = month,
                                            Day   = entryDate.Day
                                        };

                                        String entryText = extractEntryText(entryNode);

                                        List <Image> entryImages = extractEntryImages(entryNode);

                                        result.Add(
                                            new Entry()
                                        {
                                            Key    = entryKey,
                                            Title  = entryTitle,
                                            Text   = entryText,
                                            Images = entryImages
                                        });
                                    }
                                }
                                else
                                {
                                    log.Error(String.Format("Could not find any entries for month. Html: {0}", monthNode.InnerHtml));
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
        public void Crawler()
        {
            int previousPageGallNum = 1000000000;

            Console.WriteLine(initDate.ToString() + endDate.ToString());

            string url = gallUrl + "&page=";

            var client = new WebClient();

            client.Encoding = System.Text.Encoding.UTF8;
            //Dictionary value => count, replyNum, gallCount, gallRecommend
            Dictionary <UserInfo, int[]> userDic = new Dictionary <UserInfo, int[]>();

            int currentPage = this.initPage;

            while (true)
            {
                string text;
                try
                {
                    text = client.DownloadString(url + currentPage.ToString());
                    if (string.IsNullOrEmpty(text))
                    {
                        continue;
                    }
                }
                catch
                {
                    continue;
                }


                hap.HtmlDocument textHap = new hap.HtmlDocument();
                textHap.LoadHtml(text);

                hap.HtmlNodeCollection nicks = textHap.DocumentNode.SelectNodes("//tr[@class='ub-content us-post']");
                //Console.WriteLine(nicks.Count);
                //Console.WriteLine("==================" + currentPage.ToString() + "==================");
                try
                {
                    foreach (hap.HtmlNode nick in nicks)
                    {
                        int      gallNum, replyNum, gallCount, gallRecommend;
                        DateTime gallDate; string subject;

                        gallNum  = GetOnlyInt(nick.SelectSingleNode("./td[@class='gall_num']").InnerText);
                        gallDate = DateTime.ParseExact(nick.SelectSingleNode("./td[@class='gall_date']").Attributes["title"].Value,
                                                       "yyyy-MM-dd HH:mm:ss", null);
                        Console.WriteLine(gallNum.ToString() + " " + gallDate.ToString());
                        if (gallNum >= previousPageGallNum)
                        {
                            Console.WriteLine(previousPageGallNum.ToString() + " " + gallNum.ToString());
                            Console.WriteLine("번호 에러");
                            continue;
                        }
                        if (DateTime.Compare(gallDate, initDate) < 0 || DateTime.Compare(gallDate, endDate) > 0)
                        {
                            Console.WriteLine("날짜 에러");
                            continue;
                        }

                        hap.HtmlNode user         = nick.SelectSingleNode("./td[@class='gall_writer ub-writer']");
                        UserInfo     tempUserInfo = new UserInfo(user.Attributes["data-nick"].Value);
                        if (user.Attributes["data-uid"].Value == "")
                        {
                            tempUserInfo.setFluidNick(user.Attributes["data-ip"].Value);
                        }
                        else
                        {
                            tempUserInfo.setFixedNick(user.Attributes["data-uid"].Value);
                        }

                        //replyNum and subject are in <td class='gall_tit ub-word'></td>
                        hap.HtmlNode subjectNode = nick.SelectSingleNode("./td[2]");
                        try
                        {
                            if (subjectNode.Attributes["class"].Value == "gall_subject")
                            {
                                subjectNode = nick.SelectSingleNode("./td[3]");
                            }
                            subject = subjectNode.SelectSingleNode("./a[1]").InnerText;
                            if (subjectNode.SelectNodes("./a").Count == 2)
                            {
                                replyNum = GetOnlyInt(subjectNode.SelectSingleNode("./a[@class='reply_numbox']/span").InnerText);
                            }
                            else
                            {
                                replyNum = 0;
                            }
                        }
                        catch
                        {
                            subject  = "NullSubjectException";
                            replyNum = 0;
                        }
                        // Console.WriteLine("댓글: " + replyNum.ToString());
                        gallCount     = GetOnlyInt(nick.SelectSingleNode("./td[@class='gall_count']").InnerText);
                        gallRecommend = GetOnlyInt(nick.SelectSingleNode("./td[@class='gall_recommend']").InnerText);


                        //Dictionary value => count, replyNum, gallCount, gallRecommend
                        if (userDic.ContainsKey(tempUserInfo))
                        {
                            userDic[tempUserInfo][0] += 1;
                            userDic[tempUserInfo][1] += replyNum;
                            userDic[tempUserInfo][2] += gallCount;
                            userDic[tempUserInfo][3] += gallRecommend;
                        }
                        else
                        {
                            int[] tempInts = new int[] { 1, replyNum, gallCount, gallRecommend };
                            userDic.Add(tempUserInfo, tempInts);
                        }
                        UserData tempUserData = new UserData(tempUserInfo);
                        tempUserData.DataInput(gallNum, replyNum, gallCount, gallRecommend, gallDate, subject);
                        //gallDatas.Add(tempUserData);
                    }
                }
                catch
                {
                    if (ErrorOccured != null)
                    {
                        ErrorOccured(text, null);
                    }
                    currentPage++;
                    continue;
                }

                previousPageGallNum = GetOnlyInt(nicks[nicks.Count - 1].SelectSingleNode("./td[@class='gall_num']").InnerText);
                DateTime currentDate = DateTime.ParseExact(nicks[nicks.Count - 1].
                                                           SelectSingleNode("./td[@class='gall_date']").Attributes["title"].Value, "yyyy-MM-dd HH:mm:ss", null);
                if (currentPage >= endPage || DateTime.Compare(currentDate, initDate) < 0)
                {
                    break;
                }
                else
                {
                    System.Collections.ArrayList arr = new System.Collections.ArrayList();
                    string str = currentPage.ToString() + " 페이지, 날짜: " + currentDate.ToString();
                    arr.Add(str); arr.Add(currentDate); arr.Add(currentPage - initPage);
                    if (newPageHappened != null)
                    {
                        newPageHappened(arr, null);
                    }
                    currentPage++;
                }
            }
            //Dictionary value => count, replyNum, gallCount, gallRecommend
            foreach (KeyValuePair <UserInfo, int[]> user in userDic)
            {
                UserInfo tempUser = user.Key;
                tempUser.count         = user.Value[0];
                tempUser.replyNum      = user.Value[1];
                tempUser.gallCount     = user.Value[2];
                tempUser.gallRecommend = user.Value[3];
                UserRank tempUserRank = new UserRank(tempUser, user.Value[0], user.Value[1], user.Value[2], user.Value[3]);
                userList.Add(tempUserRank);
            }
            var sorted = from userRank in userList
                         orderby userRank.count descending
                         select userRank;

            userList = sorted.ToList <UserRank>();
            if (CrawlingEnded != null)
            {
                CrawlingEnded(userList, null);
            }
            string tempDataDir = Directory.GetCurrentDirectory() + "\\temp-data\\";

            Directory.CreateDirectory(tempDataDir);
            string filename = tempDataDir + gallId + DateTime.Now.ToString("_yyyy-MM-dd_HH-mm-ss");

            SaveResult(filename);
        }
Ejemplo n.º 60
0
 /// <summary>
 /// return node collection that match _LabelData.XPath
 /// </summary>
 /// <returns></returns>
 protected HtmlAgilityPack.HtmlNodeCollection GetNodeCollection()
 {
     HtmlAgilityPack.HtmlNode ndTempParent = GetTempParentNode();
     return(ndTempParent.SelectNodes(_LabelData.XPath));
 }