private void LoadHtmlTemplate()
        {
            this.document = new HtmlDocument();
            this.document.Load( Assembly.GetExecutingAssembly().GetManifestResourceStream(HtmlTemplate) );

            this.table = new HtmlNodeCollection(document.GetElementbyId(LogEventTableId));
        }
示例#2
1
        public static List<HtmlNode> FilterNodes(HtmlNodeCollection nodes, params IFilter[] filters)
        {
            var targets = new List<HtmlNode>();

            if (filters == null)
            {
                return targets;
            }

            foreach (var node in nodes)
            {
                targets.AddRange(from filter in filters where filter.MatchesFilter(node) select node);
            }

            return targets;
        }
示例#3
0
 private void getCompanyInfor(HtmlAgilityPack.HtmlNodeCollection nodes)
 {
     foreach (HtmlAgilityPack.HtmlNode node in nodes)
     {
         if (node.InnerHtml.IndexOf("株") > -1 ||
             node.InnerHtml.IndexOf("@") > -1 ||
             node.InnerHtml.IndexOf("資本金") > -1 ||
             node.InnerHtml.IndexOf("電話") > -1 ||
             node.InnerHtml.IndexOf("有限") > -1
             )
         {
             string url = node.GetAttributeValue("href", "");
             listBox1.Items.Add(url);
             string temp = node.InnerHtml;
             if (temp.IndexOf("の") > -1)
             {
                 temp = temp.Substring(0, temp.IndexOf("の"));
             }
             if (temp.IndexOf("&") > -1)
             {
                 temp = temp.Substring(0, temp.IndexOf("&"));
             }
             listBox1.Items.Add(temp);
             Application.DoEvents();
         }
     }
 }
示例#4
0
    public static void Classification(Menu menu, agi.HtmlNode node)
    {
        agi.HtmlNodeCollection divide_td = node.SelectNodes(".//td");
        agi.HtmlNodeCollection check_div = divide_td[0].SelectNodes(".//div");
        agi.HtmlNodeCollection check_br  = divide_td[0].SelectNodes(".//br");

        int count = check_br.Count;

        if (check_div == null)
        {
            return;
        }
        if (count > 2)
        {
            String text = divide_td[0].InnerHtml;
            text = text.Replace("<br>", "</div><div>");
            divide_td[0].InnerHtml = text;
            agi.HtmlNodeCollection tmp = divide_td[0].SelectNodes(".//div");
            for (int i = 0; i < tmp.Count; i++)
            {
                menu.menu.Add(tmp[i].InnerText);
            }
        }
        else
        {
            //menu.menu.Add(node.InnerText);
            for (int i = 0; i < count; i++)
            {
                menu.menu.Add(check_div[i].InnerText);
            }
        }
    }
示例#5
0
        /// <summary>
        /// 获取该页面中所有的超链接,并保存到urlLists成员中
        /// </summary>
        public void getUrls()
        {
            HtmlAgilityPack.HtmlNodeCollection nodes = webPage.DocumentNode.SelectNodes("//a/@href");
            if (nodes == null)
            {
                return;
            }
            List <Uri> list = new List <Uri>();

            foreach (HtmlNode n in nodes)
            {
                string str = n.GetAttributeValue("href", null);
                if (str != null)
                {
                    try
                    {
                        list.Add(new Uri(thisUrl, str));
                    }
                    catch (System.UriFormatException)
                    {
                        continue;
                    }
                }
            }
            this.urlLists = list.ToArray();
        }
		protected Directory ReadNode( HtmlNode directoryNode , HtmlNodeCollection childNodes ) {
			var directory = new Directory {
				Id = Guid.NewGuid().ToString() ,
				Name = directoryNode.InnerText ,
				LastUpdate = RetrieveToDateTime( directoryNode , "" )
			};

			foreach ( var childNode in childNodes ) {
				if ( childNode.Name == "div" ) {
					directory.Directories.Add( ReadNode( childNode.ChildNodes[0] , childNode.ChildNodes[1].ChildNodes ) );
				}

				if ( childNode.Name == "a" ) {
					directory.Bookmarks.Add( new Bookmark {
						Description = childNode.InnerText ,
						Created = RetrieveToDateTime( childNode , "Add_date" ) ,
						Uri = childNode.GetAttributeValue( "href" , string.Empty ) ,
						Icon = childNode.GetAttributeValue( "icon" , string.Empty ) ,
						LastUpdate = RetrieveToDateTime( childNode , "last_modified" ) ,
						IconUrl = childNode.GetAttributeValue( "icon_uri" , string.Empty )
					} );
				}
			}

			return directory;
		}
示例#7
0
        private string PrintDom(HtmlAgilityPack.HtmlNodeCollection elemColl, System.Text.StringBuilder returnStr, Int32 depth)
        {
            System.Text.StringBuilder str = new System.Text.StringBuilder();
            //  IHTMLElement htmlElementCollection = null;

            foreach (HtmlNode elem in elemColl)
            {
                string elemName;

                elemName = elem.GetAttributeValue("ID", "null");
                if (elemName == null || elemName.Length == 0)
                {
                    elemName = elem.GetAttributeValue("name", "null");
                    if (elemName == null || elemName.Length == 0)
                    {
                        elemName = "<no name>";
                    }
                }

                str.Append(' ', depth * 4);
                str.Append(elemName + ": " + elem.Name + "(Level " + depth + ")");
                returnStr.AppendLine(str.ToString());

                if (elem.HasChildNodes)
                {
                    PrintDom(elem.ChildNodes, returnStr, depth + 1);
                }

                str.Remove(0, str.Length);
            }

            return(returnStr.ToString());
        }
示例#8
0
        /// <summary>
        /// Run xpath from html or node
        /// </summary>
        public List<List<KeyValuePair<string, object>>> run(HtmlNode node)
        {
            Factory.Instance.iInfo(string.Format("Running xpathCollection id : {0}", rule.id));

            HtmlNodeCollection nodes = new HtmlNodeCollection(node);
            HtmlNodeCollection n2 = node.SelectNodes(rule.xpath);
            if (n2 != null)
            {
                foreach (HtmlNode n in n2)
                    nodes.Add(n);
            }

            //run
            if (node != null)
            {

                foreach (HtmlNode n in nodes)
                {
                    List<KeyValuePair<string, object>> last_val = null;
                    if (rule.xpathSingle != null)
                    {
                        XPathSingle xs = new XPathSingle(rule.xpathSingle, last_val);
                        last_val = (List<KeyValuePair<string, object>>)xs.Run(n);
                        res.Add(last_val);
                    }
                }
            }
            return res;
        }
示例#9
0
 /// <summary>
 /// 
 /// </summary>
 /// <param name="htmlNodeCollection">moi the tr chua mot item</param>
 /// <returns></returns>
 private List<ItemTemp> getListItems(HtmlNodeCollection htmlNodeCollection,bool isOutBound)
 {
     List<ItemTemp> liItems = null;
     try
     {
         liItems = new List<ItemTemp>();
         int id = 0;
         foreach (HtmlNode node in htmlNodeCollection)
         {
             try
             {
                 ItemTemp item = new ItemTemp();
                 //lay gia
                 HtmlNode nodeTemp = node.SelectSingleNode("td[2]");
                 item.prices = getListprices(nodeTemp);
                 //lay cac segment
                 nodeTemp = node.SelectSingleNode("td[1]");
                 DateTime d=isOutBound?_input.DepartTime:_input.ReturnTime;
                 item.Segments = getListSegments(d,nodeTemp);
                 item.Id = id;
                 //item.TotalTime = node.SelectSingleNode("td[1]//ul/li[3]/strong").InnerText.Replace("Total Duration: ", "");
                 //item.TotalTime = item.TotalTime.Substring(0, item.TotalTime.IndexOf('m') + 1);
                 liItems.Add(item);
                 //chi lay 4 item
                 if (liItems.Count > 3)
                     break;
                 id++;
             }
             catch { }
         }
     }
     catch
     { }
     return liItems;
 }
示例#10
0
 private Idol ParseIdolData(HtmlNodeCollection td)
 {
     try
     {
         return new Idol(
         ExtractLabel(td[LabelColumn].InnerText.Trim()), td[NameColumn].InnerText.Trim(), td[RarityColumn].InnerText.Trim().ToRarity(),
         td[CategoryColumn].InnerText.Trim().ToIdolCategory(), Convert.ToInt32(td[LifeColumn].InnerText.Trim()), Convert.ToInt32(td[DanceColumn].InnerText.Trim().Replace(",", "")),
          Convert.ToInt32(td[VocalColumn].InnerText.Trim().Replace(",", "")), Convert.ToInt32(td[VisualColumn].InnerText.Trim().Replace(",", "")),
          DateTime.Parse(td[ImplementationDateColumn].InnerText.Trim()),
         CenterEffect.Create(td[CenterEffectColumn].InnerText.Trim(), 
             td[CenterEffectDetailsColumn].InnerText.Trim()
             .Replace("パッショナイドル", "パッションアイドル")),
         Skill.Create(td[SkillColumn].InnerText.Trim(),
             td[SkillDetailsColumn].InnerText.Trim()
             .Replace("PEFECT", "PERFECT")
             .Replace("PERFCT", "PERFECT")
             .Replace("秒毎", "秒ごと")
             .Replace("秒間", "秒ごと")
             .Replace("しばらく間", "しばらくの間")));
     }
     catch (Exception)
     {
         return null;
     }
 }
示例#11
0
        public static string getGIPAddr()
        {
            string t_url = "http://www.ugtop.com/spill.shtml";
            string val   = "";

            try
            {
                WebClient wc = new WebClient();
                wc.Encoding = Encoding.UTF8;


                var doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(wc.DownloadString(t_url));

                HtmlAgilityPack.HtmlNodeCollection fns = doc.DocumentNode.SelectNodes(@"//font");

                int cnt = 1;
                foreach (var fn in fns)
                {
                    if (cnt == 2)
                    {
                        val += fn.InnerText;
                    }
                    cnt++;
                }
            } catch (Exception ex)
            {
                MessageBox.Show("システムエラーです。何度か実行してもこのエラーが出る場合は、チームリーダーに報告してください。\r\n" + ex.ToString(), "警告");
            }

            //Debug
            //Debug.WriteLine(val);
            return(val);
        }
        /// <summary>
        /// Receives a nodeCollection and get all src values in the img nodes of the colection
        /// </summary>
        /// <param name="nodeCollection">NodeCollection</param>
        /// <param name="nodeCollectionName">The name that appears in the log(Generally the target node name)</param>
        /// <param name="expectedSize">The expected size of the return</param>
        /// <returns></returns>
        public List<string> GetImageUrls(HtmlNodeCollection nodeCollection, string nodeCollectionName, int expectedSize)
        {
            List<string> imgUrlsList = new List<string>();

            foreach (var imgUrl in nodeCollection)
            {
                //Get ImagesUrls
                var imgUrls = imgUrl.Descendants("img")
                                    .Select(e => e.GetAttributeValue("src", null))
                                    .Where(s => !String.IsNullOrEmpty(s));

                //Exemplo de 2 wheres
                //.Where(s => s.InnerText == "a")
                //                    .Where(s => s.InnerText ==  "b")                                    
                //                    .Select(e => e.GetAttributeValue("src", null));

                //Create a log if the return number isnot the expected
                if (imgUrls.Count() != expectedSize)
                    LogHandler.createWarningLog(nodeCollectionName, expectedSize, imgUrls.Count());

                //Caso o valor esperado seja maior que um, temos que adicionar todos os itens a lista
                if (expectedSize > 1)
                {
                    foreach (var url in imgUrls)
                    {
                        imgUrlsList.Add(url);
                    }
                }
                //Se o valor esperado for um, adiciona o primeiro da lista
                else
                    imgUrlsList.Add(imgUrls.First());
            }

            return imgUrlsList;
        }
示例#13
0
        /// <summary>
        /// 获取全国市区地图地址URL
        /// </summary>
        public void UpdataCityURL()
        {
            List <CityModel> cityModels = CityURLConfig.GetInstance().cityURLConfig.cityModels;

            cityModels.Clear();
            string url = String.Format("http://www.city8.com/#cityaf");

            try
            {
                HttpWebResponse hp      = HttpHelper.CreateGetHttpResponse(url, 1000, "", null);
                string          context = HttpHelper.GetResponseString(hp);
                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                htmlDoc.LoadHtml(context);  // 加载html页面
                HtmlNode navNode = htmlDoc.DocumentNode;
                HtmlAgilityPack.HtmlNodeCollection nodes = navNode.SelectNodes("//div[@class='v5_ll_test']/ul/li/a");
                foreach (HtmlNode htmlNode in nodes)
                {
                    CityModel model = new CityModel();
                    string    name  = htmlNode.InnerText.Trim();
                    model.name = name;
                    model.URL  = htmlNode.Attributes["href"].Value;
                    if (this.cityRoadLoadLog != null)
                    {
                        string log     = "正在下载城市:" + name;
                        int    process = 100;
                        this.cityRoadLoadLog(log, process);
                    }
                }
                CityURLConfig.GetInstance().SaveConfig();
            }
            catch
            {
                MessageBox.Show("更新失败");
            }
        }
示例#14
0
        /// <summary>
        /// 加载
        /// </summary>
        public void Init(Action success, Action <string> fail)
        {
            string url = ServiceConfig.GetConfig(ServiceConfigType.DriveServicePostUrl) + GetUrlGetArgs();

            Http.Get(ServiceConfig.GetConfig(ServiceConfigType.DriveServicePostUrl) + GetUrlGetArgs()).OnSuccess(content =>
            {
                Html.HtmlDocument document = new Html.HtmlDocument();
                document.LoadHtml(content);
                _inputs  = document.DocumentNode.SelectNodes("//input[@type='password' or @type='hidden' or @type='text' or @type='radio']");
                _selects = document.DocumentNode.SelectNodes("//select");

                if (ServiceError.TestServiceError(document.DocumentNode))
                {
                    fail("服务器出错!");
                }
                else
                {
                    //
                    var msgnode = document.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[1]/tr[2]/td[1]/table[1]/tr[1]/td[2]/table[2]/tr[1]/td[1]/table[1]/tr[1]/td[2]/div[1]/span[1]");
                    if (msgnode != null && !String.IsNullOrEmpty(msgnode.InnerText) && msgnode.InnerText.IndexOf("您已约过班车") == -1)
                    {
                        fail(msgnode.InnerText);
                        return;
                    }
                    success();
                }
            }).TimeOut(5000).OnFail(new Action <WebException>((exp) =>
            {
                fail(exp.ToString());
            })).Go();
        }
示例#15
0
        private void button_Click(object sender, RoutedEventArgs e)
        {
            textBox.Text  = "";
            textBox2.Text = "";
            string tag = textBox1.Text;

            HtmlAgilityPack.HtmlDocument mydoc = new HtmlAgilityPack.HtmlDocument();
            mydoc.LoadHtml(htmlcode);
            HtmlAgilityPack.HtmlNodeCollection nodeCol = null;
            try
            {
                nodeCol = mydoc.DocumentNode.SelectNodes(tag);
            }
            catch (Exception)
            {
            }
            int cnt = 0;

            try
            {
                foreach (HtmlAgilityPack.HtmlNode node in nodeCol)
                {
                    //textBox.Text += node.InnerHtml;
                    textBox2.Text += node.InnerText;
                    textBox2.Text += "-----";
                    cnt++;
                }
            }
            catch (NullReferenceException)
            {
                textBox.Text = "nothing found!";
            }
            textBox.Text = cnt.ToString();
        }
示例#16
0
        private static ScorecardDetails GetDetails(HtmlNodeCollection cells, string season)
        {
            string scorecardUrl;
            string homeTeam;
            string awayTeam;
            string groundUrl;
            string groundName;

            DateTime date;
            DateTime.TryParse(cells[1].InnerText.Trim(), out date);

            ParseScorecardLink(cells[4].FirstChild, out scorecardUrl, out homeTeam, out awayTeam);
            if (!IsOfInterest(homeTeam, awayTeam))
                return null;

            ParseGroundLink(cells[5].FirstChild, out groundUrl, out groundName);

            string matchCode = cells[6].InnerText;

            return new ScorecardDetails
                       {
                           Season = season,
                           MatchCode = matchCode,
                           HomeTeam = homeTeam,
                           AwayTeam = awayTeam,
                           GroundName = groundName,
                           GroundUrl = groundUrl,
                           Date = date,
                           LastChecked = DateTime.Now,
                           ScorecardUrl = scorecardUrl,
                           ScorecardAvailable = !string.IsNullOrEmpty(scorecardUrl),
                       };
        }
示例#17
0
        public static List<string> getNameOfEmail(string url)
        {
            List<string> a = new List<string>();
            HtmlWeb website = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = website.Load(url);
            HtmlNodeCollection authors = new HtmlNodeCollection(doc.DocumentNode.ParentNode); ;
            authors = doc.DocumentNode.SelectNodes(".//li[@itemprop='author']");

            if (!Directory.Exists(@"C:\Springer\"))
            {
                Directory.CreateDirectory(@"C:\Springer\");
            }

            using (StreamWriter outputFile = new StreamWriter(@"C:\Springer\Springer Emails.txt", true))
                {
                    if (authors != null)
                    {

                        foreach (HtmlNode author in authors)
                        {

                            HtmlNode Name = author.SelectSingleNode(".//a[@class='person']");
                            HtmlNode EMail = author.SelectSingleNode(".//a[@class='envelope']");

                            if (EMail != null)
                            {
                                outputFile.WriteLine(Name.InnerText + " - " + EMail.Attributes["title"].Value);
                            }
                        }
                    }

                }

            return a;
        }
示例#18
0
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads = @"Accept: application/json, text/javascript, */* q=0.01 " +
                           @"Accept-Encoding: gzip, deflate " +
                           @"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 " +
                           @"Connection: keep-alive " +
                           @"Cookie: s_ViewType=10; _lxsdk_cuid=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _lxsdk=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _hc.v=6c48a318-c117-5df7-478a-f0f694f1570e.1591768948; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1591768950,1591788446; _lxsdk_s=1729dfc18eb-4f6-3ef-94c%7C%7C19; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1591788446 " +
                           @"Host: catdot.dianping.com " +
                           @"Referer: http:/www.dianping.com/search…/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE " +
                           @"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0";
            string url = @"http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE";
            ClassHttpRequestClient s   = new ClassHttpRequestClient(true);
            HtmlDocument           doc = new HtmlDocument();
            string content             = "";
            string response            = s.httpPost(url, heads, content, Encoding.UTF8);

            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit     = item.SelectNodes("div[@class=\"txt\"]")[0];
                HtmlAgilityPack.HtmlNode aname      = divtit.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0];
                sb.Append(string.Format("{0}—{1}—{2}", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }
示例#19
0
        private static List <string> GetdateSeparatorList(HtmlAgilityPack.HtmlDocument pageresult)
        {
            string dateSeparatorXPath = "//*[contains(concat( \" \", @class, \" \" ), concat( \" \", \"w2g\", \" \" ))]";

            HtmlAgilityPack.HtmlNodeCollection dateSeparators = pageresult.DocumentNode.SelectNodes(dateSeparatorXPath); // Date separators, e.g. 2019. március 3. vasárnap. Typically there is 7 on a page.
            List <string> dateSeparatorList = new List <string>();                                                       // This holds a list of date separator strings.

            string daterangeXPath = "//*[@id=\"ctl00_C_p\"]/div[@class=\"tvhead\"]/div[@class=\"tvheadtitle\"]/h2[@class=\"tvh2\"]";

            HtmlAgilityPack.HtmlNode startdate = pageresult.DocumentNode.SelectSingleNode(daterangeXPath); // This will be used to get the first day/date of the current week schedule
            dateSeparatorList.Add(startdate.InnerText);                                                    // start the dateseparatorList with the startdate

            string[]        parts1       = startdate.InnerText.Split();                                    // Fix up first entry with proper day of the week
            string[]        parts2       = dateSeparators[dateSeparators.Count - 1].InnerText.Split();
            List <ShowDate> showDateList = new List <ShowDate>();

            dateSeparatorList[0] = string.Join(" ", String.Join(" ", parts1.Take(3).ToArray()), parts2[parts2.Count() - 1]);

            foreach (HtmlAgilityPack.HtmlNode dateSeparator in dateSeparators)
            {
                dateSeparatorList.Add(dateSeparator.InnerText);
            }

            return(dateSeparatorList);
        }
 void Download(string url, ref HtmlNodeCollection nodes, ref HtmlNodeCollection nodes2)
 {
     wc.DownloadFile(url, "0.htm");
     doc.Load("0.htm", Encoding.GetEncoding(1251));
     nodes = doc.DocumentNode.SelectNodes("//section[@class != 'promo__write_response']/p");
     nodes2 = doc.DocumentNode.SelectNodes("//div[@class='card__responses__response__information__rating']/*/b");
 }
示例#21
0
        public List<Article> ConverArticles(HtmlNodeCollection collection)
        {
            foreach (var item in collection)
            {
            }

            return null;
        }
 public List<int> ParseTraining(HtmlNodeCollection calendarTable)
 {
     var trainingIds = calendarTable.Descendants("a")
         .Where(n => n.GetAttributeValue("href", "").Contains("TrainingID="))
         .Select(n => GetTrainingIdFromUrl(n.GetAttributeValue("href", "")))
         .Distinct()
         .ToList();
     return trainingIds;
 }
 public domTreeManagementViewModel(string htmlPath)
 {
     _ui_Element_List = new List<UIElementInfo>();
     _my_htmlDoc = new HtmlDocument();
     _my_htmlDoc.Load(htmlPath);
     _htmlPath = htmlPath;
     _body_htmlNodeCollection = _my_htmlDoc.DocumentNode.SelectNodes("//body");
     //_scriptList
 }
 /// <summary>
 /// Gets the location information such as:
 /// Yükseklik:  28 m. Boylam:  29° 9' D Enlem:  40° 54' K Gün Batımı:  17:10 Gün Doğumu:  07:21
 /// </summary>
 /// <param name="locationInfoNodes">The html location information nodes.</param>
 private static LocationInfo GetLocationInfo(HtmlNodeCollection locationInfoNodes)
 {
     LocationInfo locationInfo = new LocationInfo();
     locationInfo.Altitude = locationInfoNodes[0].LastChild.InnerText.RemoveNbsp();
     locationInfo.Longitude = locationInfoNodes[1].LastChild.InnerText.RemoveNbsp().ReplaceQuoteAndDegree();
     locationInfo.Latitude = locationInfoNodes[2].LastChild.InnerText.RemoveNbsp().ReplaceQuoteAndDegree();
     locationInfo.Sunset = locationInfoNodes[3].LastChild.InnerText.RemoveNbsp();
     locationInfo.Sunrise = locationInfoNodes[4].LastChild.InnerText.RemoveNbsp();
     return locationInfo;
 }
示例#25
0
        static HtmlAgilityPack.HtmlNodeCollection SearchXpathMulti(string contents, string xpathSearchString)
        {
            // Make async in future ?
            HtmlDocument htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(contents);

            HtmlAgilityPack.HtmlNodeCollection xpathObject = htmlDoc.DocumentNode.SelectNodes(xpathSearchString);
            return(xpathObject);
        }
示例#26
0
        public HtmlContent GetContent(string xpath)
        {
            if (this.url == null || content == null)
            {
                throw new InvalidOperationException("call IContentExtractor.GotoPage first");
            }
            HtmlContent doc = new HtmlContent();

            doc.LastUpdatedDate = DateTime.Now;
            HtmlDocument htmldoc = new HtmlDocument();

            htmldoc.LoadHtml(content);
            HtmlAgilityPack.HtmlNode titleNode = htmldoc.DocumentNode.SelectSingleNode("//title");
            if (titleNode != null)
            {
                doc.Title = titleNode.InnerText;
            }
            if (xpath.ToLower() == "all")
            {
                doc.Content = htmldoc.DocumentNode.OuterHtml;
            }
            else if (!xpath.StartsWith("/"))
            {
                HtmlAgilityPack.HtmlNodeCollection divs = htmldoc.DocumentNode.SelectNodes("//div");
                if (divs == null)
                {
                    return(null);
                }
                HtmlNode targetNode = divs.FirstOrDefault(a => a.Id == xpath || (a.Attributes["class"] != null && a.Attributes["class"].Value == xpath));
                if (targetNode == null)
                {
                    if (Logger != null)
                    {
                        Logger.WarnFormat("[Mine Fails] '{1}' container not found - {0}", url, xpath);
                    }
                    return(null);
                }
                doc.Content = targetNode.OuterHtml;
            }
            else //starts with '//'
            {
                var targetNode = htmldoc.DocumentNode.SelectSingleNode(xpath);
                if (targetNode == null)
                {
                    if (Logger != null)
                    {
                        Logger.WarnFormat("[Mine Fails] '{1}' container not found - {0}", url, xpath);
                    }
                    return(null);
                }
                doc.Content = targetNode.OuterHtml;
            }
            doc.Url = url;
            return(doc);
        }
 private static IEnumerable<SyndicationFeedsDataObject> ConvertNodesToSyndicationFeeds(HtmlNodeCollection feeds)
 {
     var query = from link in feeds
                 select new SyndicationFeedsDataObject
                 {
                     FeedUrl = link.Attributes["href"].Value,
                     Title = link.Attributes["title"].Value,
                     MimeType = link.Attributes["type"].Value
                 };
     return query;
 }
示例#28
0
        private bool CheckImages(CrawledPage crawledPage)
        {
            bool result = true;

            if (Tests.HasFlag(DocumentChecks.ImagesExist))
            {
                HtmlDocument agilityPackHtmlDocument = new HtmlDocument();
                agilityPackHtmlDocument.LoadHtml(crawledPage.Content.Text);

                HtmlAgilityPack.HtmlNodeCollection nodes =
                    agilityPackHtmlDocument.DocumentNode.SelectNodes(
                        @"//img[@src]");

                if (null != nodes)
                {
                    foreach (var image in nodes)
                    {
                        var    source   = image.Attributes["src"];
                        string contents = source.Value;

                        if (!imagesChecked.Contains(contents))
                        {
                            string baseUrl = string.Format(
                                CultureInfo.InvariantCulture,
                                "{0}://{1}",
                                crawledPage.Uri.Scheme,
                                crawledPage.Uri.Host);
                            string imageUrl =
                                GetAbsoluteUrlString(baseUrl, source.Value);

                            Uri  uri    = new Uri(imageUrl);
                            bool exists = URLExists(uri);

                            if (false == exists)
                            {
                                result = false;

                                string message = string.Format(
                                    CultureInfo.InvariantCulture,
                                    "image missing: {0} in {1}",
                                    imageUrl,
                                    crawledPage.Uri.AbsoluteUri);
                                Log.Error(CultureInfo.InvariantCulture, m => m(
                                              message));
                            }

                            imagesChecked.Add(source.Value);
                        }
                    }
                }
            }

            return(result);
        }
		public string Translate (HtmlNodeCollection commandNodes)
		{
			string output = String.Empty;
			
			foreach (HtmlNode node in commandNodes) {
				output += "\t\t\t"; // three tabs should indent the code appropriately
				output += Translate (node);
				output += "\n";
			}
			
			return output;
		}
        /// <summary>
        /// Selects a list of nodes matching the <see cref="XPath"/> expression.
        /// </summary>
        /// <param name="xpath">The XPath expression.</param>
        /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query, or <c>null</c> if no node matched the XPath expression.</returns>
        public HtmlNodeCollection SelectNodes(string xpath)
        {
            var list = new HtmlNodeCollection(null);

            var nav = new HtmlNodeNavigator(OwnerDocument, this);
            var it = nav.Select(xpath);
            while (it.MoveNext())
            {
                var n = (HtmlNodeNavigator)it.Current;
                list.Add(n.CurrentNode);
            }
            return list.Count == 0 ? null : list;
        }
示例#31
0
 private void ProcessDataBinds(HtmlNodeCollection nodes, dynamic data)
 {
     if (nodes != null)
     {
         foreach (var j in nodes)
         {
             var propName = j.Attributes.First(a => a.Name == "data-bind").Value;
             // TODO: Support formatter as in data-format="{value}[, ]" style.
             j.InnerHtml = GetDataValue(data, propName);
             j.Attributes.Remove("data-bind");
         }
     }
 }
示例#32
0
        public string Parse(HtmlNodeCollection examples, string labels)
        {
            string result = String.Empty;

            foreach (var example in examples)
            {
                var en = getInnerHtml(example.SelectSingleNode("td[@class='left']/p/span"));
                var ru = getInnerHtml(example.SelectSingleNode("td[@class='right']/p/span"));
                result += en + "\t" + en + "\t" + ru + "\t" + labels + "\n";
            }

            return result;
        }
示例#33
0
 private string SafeGetAttributeValue(HtmlNodeCollection collection, string name)
 {
     string result = string.Empty;
     var attr =
         collection
         .Select(n => n.Attributes.FirstOrDefault(a => a.Name == name))
         .FirstOrDefault();
     if (attr != null)
     {
         result = attr.Value;
     }
     return result;
 }
示例#34
0
 private void AnalyzeNodes(List<string> addTo, HtmlNodeCollection col2)
 {
     if (col2 != null)
     {
         foreach (HtmlNode node in col2)
         {
             string curNodeData = AnalyzeNode(node);
             if (curNodeData != "") {
                 addTo.Add(curNodeData);
             }
         }
     }
 }
        public CityRoad GetRoadsByCityName(string url, string modeName, int totalCount)
        {
            CityRoad road = new CityRoad();

            road.cityName = modeName;

            string context = "";
            string tempUrl = String.Format("{0}/G70/", url.TrimEnd('/'));

            try
            {
                var request = System.Net.WebRequest.Create(tempUrl) as System.Net.HttpWebRequest;
                request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;

                request.UserAgent =
                    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36";
                request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";

                request.Timeout = 50000;
                var stream = request.GetResponse().GetResponseStream();
                context = new System.IO.StreamReader(stream).ReadToEnd();
                stream.Close();
                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                htmlDoc.LoadHtml(context); // 加载html页面
                HtmlNode navNode = htmlDoc.DocumentNode;
                HtmlAgilityPack.HtmlNodeCollection nodes =
                    navNode.SelectNodes("//div[@class='sortC']/dl/dd/a");
                if (nodes == null)
                {
                    return(road);
                }
                foreach (HtmlNode htmlNode in nodes)
                {
                    string name = htmlNode.InnerText.Trim();
                    road.Roads.Add(name);
                    if (this.cityRoadLoadLog != null)
                    {
                        string log     = "正在下载城市:" + modeName + ",道路:" + name;
                        int    process = k * 100 / totalCount;
                        this.cityRoadLoadLog(log, process);
                    }
                }
            }
            catch (Exception ex)
            {
                log4net.LogManager.GetLogger(this.GetType())
                .ErrorFormat("{0}请求路网名称失败,请求地址:{1},错误信息:{2}", modeName, tempUrl, ex);
            }

            return(road);
        }
示例#36
0
        /// <summary>
        ///解析html数据
        ///
        /// <summary>
        private void GetClassElenemt(string resultStr)
        {
            //js调用方式1,当需要返回值的时候
            //获取自己余额
            var task = this.webBrowser1.GetBrowser().MainFrame.EvaluateScriptAsync("(function() { return document.getElementsByClassName('_2HLqr')[0].innerText;})();", null);

            task.ContinueWith(t =>
            {
                if (!t.IsFaulted)
                {
                    var response             = t.Result;
                    EvaluateJavaScriptResult = response.Success ? (response.Result ?? "null") : response.Message;
                    var my = GetFormatMoney(EvaluateJavaScriptResult.ToString());
                }
            });

            //html解析器
            hap.HtmlDocument htmlDocument = new hap.HtmlDocument();
            htmlDocument.LoadHtml(resultStr);
            hap.HtmlNodeCollection formNodes = htmlDocument.DocumentNode.SelectNodes("//div[@class='_3a3_c']");
            if (formNodes == null)
            {
                return;
            }
            //对抓取到的几个房间进行解析
            int index = 0;

            foreach (var item in formNodes)
            {
                var time  = item.SelectSingleNode("//div[@class='_2rb_p']");
                var state = item.SelectSingleNode("//div[@class='_1sIzQ']");
                //测试,通过C# node传入js,在js里面修改node的值
                if (time == null)
                {
                    return;
                }
                //如果这个区域已经下了注
                if (this.isBet1[index])
                {
                    return;
                }
                this.isBet1[index] = true;
                //投注
                //选择筹码
                //                   webBrowser1.Document.GetElementById("").RaiseEvent("click");
                //                 //选择庄、闲
                //               webBrowser1.Document.GetElementById("").RaiseEvent("click");
                //             //确定
                //           webBrowser1.Document.GetElementById("").RaiseEvent("click");
            }
        }
示例#37
0
        public Task wyszukiwanie()
        {
            return Task.Factory.StartNew(() =>
                {

                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    HtmlWeb hw = new HtmlWeb();
                    doc = hw.Load(textBox1.Text);
                    nazwa = doc.DocumentNode.SelectNodes("//ul[@class=\"product_list float\"]/li/div[3]/a[2]/text()");
                    n_cena = doc.DocumentNode.SelectNodes("//ul[@class=\"product_list float\"]/li/div[3]/div[1]/span[3]/text()");
                    s_prom = doc.DocumentNode.SelectNodes("//ul[@class=\"product_list float\"]/li/div[3]/div[1]/span[2]/span[2]/text()");

                });            
        }
示例#38
0
        /// <summary>
        /// Selects a list of nodes matching the <see cref="XPath"/> expression.
        /// </summary>
        /// <param name="xpath">The XPath expression.</param>
        /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query, or <c>null</c> if no node matched the XPath expression.</returns>
        public HtmlNodeCollection SelectNodes(string xpath)
        {
            HtmlNodeCollection list = new HtmlNodeCollection(null);
            HtmlNodeNavigator nav = new HtmlNodeNavigator(OwnerDocument, this);

            XPathNodeIterator it = nav.Select(xpath);
            while (it.MoveNext())
            {
                HtmlNodeNavigator n = (HtmlNodeNavigator)it.Current;
                list.Add(n.CurrentNode);
            }

            return list;
        }
        public void GetLinksReturnsCorrectValue()
        {
            // check that there is no exception and it returns empty list
            Assert.Equal(0, GetLinks(null).Count);

            // check with the predefined html
            HtmlDocument doc = new HtmlDocument();
            Page page = PageDocumentTestHelper.GetTestPage(3);
            doc.LoadHtml(page.Content);
            HtmlNodeCollection col = new HtmlNodeCollection(doc.DocumentNode);
            foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
                col.Add(link);
            Assert.Equal(129, GetLinks(col).Count);
        }
示例#40
0
 static void Print(HtmlNodeCollection o)
 {
     if (o != null)
     {
         foreach (var node in o)
         {
             Console.WriteLine(node.InnerText);
         }
     }
     else
     {
         Console.WriteLine("is null");
     }
 }
 private static HtmlNodeCollection removeTextNodes (HtmlNodeCollection collection)
 {
     for (int i = 0; i < collection.Count; i++)
     {
         if (collection[i].Name == "#text")
         {
             collection.RemoveAt(i);
             //Not in final!!!
             i--;
             continue;
         }
     }
     return collection;
 }
示例#42
0
 private void GetNodeText(HtmlNode node, StringBuilder result, HtmlNodeCollection inputs)
 {
     string text = node.InnerText;
     if (!string.IsNullOrEmpty(text))
         result.Append(text.Trim());
     else
     {
         if (inputs.Contains(node))
         {
             HtmlAttribute att = node.Attributes["value"];
             if (att != null)
                 result.Append(" " + att.Value.Trim());
         }
     }
 }
示例#43
0
        /// <summary>
        /// 根据字符串提供的xpath获取匹配到的字符串
        /// </summary>

        /// <param name="str">xpath</param>
        /// <returns>字符串</returns>
        public string[] getXPath(string str)
        {
            HtmlAgilityPack.HtmlNodeCollection nodes = webPage.DocumentNode.SelectNodes(str);
            List <String> list = new List <string>();

            foreach (HtmlNode n in nodes)
            {
                string txt = n.GetDirectInnerText();
                if (txt != null || txt != "")
                {
                    list.Add(txt);
                }
            }
            return(list.ToArray());
        }
示例#44
0
        public string getMidiLink(string pageUrl)
        {
            string        htmlText   = GetHtmlText(pageUrl);
            string        midiLink   = null;
            List <string> childLinks = new List <string>();

            Hap.HtmlDocument htmDoc = new Hap.HtmlDocument();
            htmDoc.LoadHtml(htmlText);

            Hap.HtmlNodeCollection targetElements = htmDoc.DocumentNode.SelectNodes("//table[@data-test]");

            List <string> Links = new List <string>();


            return(midiLink);
        }
 private static HashSet<Movie> ExtractWatchlistMoviesFromHtml(HtmlNodeCollection rawMoviesCollection)
 {
     const string mainSiteUrl = "http://www.nziff.co.nz";
     var movies = new HashSet<Movie>();
     foreach (var movieParentNode in rawMoviesCollection)
     {
         var movieNode = movieParentNode.SelectSingleNode("h3/a");
         var movie = new Movie
         {
             Title = movieNode.InnerText,
             WebsiteUrl = new Uri(mainSiteUrl + movieNode.Attributes["href"].Value)
         };
             movies.Add(movie);
     }
     return movies;
 }
示例#46
0
        private List<string> GetLinks(HtmlNodeCollection nodes)
        {
            List<string> hrefs = new List<string>();

            if (nodes == null)
                return hrefs;

            string hrefValue = "";
            foreach (HtmlNode node in nodes)
            {
                hrefValue = node.Attributes["href"].Value;
                if (!string.IsNullOrWhiteSpace(hrefValue))
                    hrefs.Add(hrefValue);
            }

            return hrefs;
        }
 private void AddSubcats(Category category, HtmlNodeCollection subcats)
 {
     category.HasSubCategories = true;
     category.SubCategories = new List<Category>();
     foreach (HtmlNode sub in subcats)
     {
         HtmlNode aNode = sub.SelectSingleNode("a");
         RssLink subcat = new RssLink()
         {
             Name = HttpUtility.HtmlDecode(aNode.ChildNodes[1].InnerText),
             Url = FormatDecodeAbsolutifyUrl(baseUrl, aNode.Attributes["href"].Value, null, UrlDecoding.None),
             ParentCategory = category
         };
         category.SubCategories.Add(subcat);
     }
     category.SubCategoriesDiscovered = true;
 }
示例#48
0
        private void getCompanyInfor2(string fileName)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.OptionAutoCloseOnEnd = false;  //最後に自動で閉じる(?)
            doc.OptionCheckSyntax    = false;  //文法チェック。
            doc.OptionFixNestedTags  = true;   //閉じタグが欠如している場合の処理
            FileStream   fs = new FileStream(fileName, FileMode.Open);
            StreamReader sr = new StreamReader(fs, Encoding.UTF8);

            doc.Load(sr);
            fs.Close();
            sr.Close();
            HtmlAgilityPack.HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//a");
            if (nodes != null)
            {
                getCompanyInfor2(nodes, fileName);
            }
        }
示例#49
0
        public CityRoad GetRoadsByCityName(string url, string modeName, int totalCount)
        {
            CityRoad road = new CityRoad();

            road.cityName = modeName;
            string[] codes = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" };
            for (int i = 0; i < codes.Length; i++)
            {
                string context = "";
                string tempUrl = String.Format("{0}road/{1}/", url, codes[i]);
                try
                {
                    using (WebClient webClient = new WebClient())
                    {
                        webClient.Encoding = new System.Text.UTF8Encoding();
                        context            = webClient.DownloadString(tempUrl);
                    }
                    HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                    htmlDoc.LoadHtml(context);  // 加载html页面
                    HtmlNode navNode = htmlDoc.DocumentNode;
                    HtmlAgilityPack.HtmlNodeCollection nodes = navNode.SelectNodes("//div[@class='road_sahngjia road_zm_list']/a");
                    if (nodes == null)
                    {
                        continue;
                    }
                    foreach (HtmlNode htmlNode in nodes)
                    {
                        string name = htmlNode.InnerText.Trim();
                        road.Roads.Add(name);
                        if (this.cityRoadLoadLog != null)
                        {
                            string log     = "正在下载城市:" + modeName + ",道路:" + name;
                            int    process = k * 100 / totalCount;
                            this.cityRoadLoadLog(log, process);
                        }
                    }
                }
                catch (Exception ex)
                {
                    log4net.LogManager.GetLogger(this.GetType()).ErrorFormat("{0}请求路网名称失败,请求地址:{1},错误信息:{2}", modeName, tempUrl, ex);
                }
            }
            return(road);
        }
示例#50
0
        private void GetDataLotto()
        {
            DataTable dtLotto = new DataTable();

            memoryStream = new MemoryStream(webClient.DownloadData(@"https://www.taiwanlottery.com.tw/Lotto/Lotto649/history.aspx"));
            HAP.HtmlDocument doc = new HAP.HtmlDocument();
            doc.Load(memoryStream, Encoding.UTF8);

            HAP.HtmlDocument docData = new HAP.HtmlDocument();
            docData.LoadHtml(doc.DocumentNode.SelectSingleNode(@"//table[@id='Lotto649Control_history_dlQuery']").InnerHtml);

            //新增欄位
            HAP.HtmlDocument docTalbeCol = new HAP.HtmlDocument();
            docTalbeCol.LoadHtml(docData.DocumentNode.SelectSingleNode(@"//table[@class='table_org td_hm']").InnerHtml);

            foreach (HAP.HtmlNode col in docTalbeCol.DocumentNode.SelectNodes(@"//tr[1]/td[@class='td_org1']"))
            {
                dtLotto.Columns.Add(col.InnerText.Trim());
            }

            dtLotto.Columns.Add("獎號");
            dtLotto.Columns.Add("特別號");

            //新增每期開獎資料
            foreach (HAP.HtmlNode tr in docData.DocumentNode.SelectNodes(@"/tr/td/table/tr[2]"))
            {
                dtLotto.Rows.Add(tr.SelectNodes(@"td[@class='td_w']").Select(aa => aa.InnerText.Trim()).ToArray());
            }

            HAP.HtmlNodeCollection Data = docData.DocumentNode.SelectNodes(@"/tr/td/table/tr[5]");
            for (int intA = 0; intA < Data.Count; intA++)
            {
                string[] sPara              = Data[intA].SelectNodes(@"td[@class='td_w font_black14b_center']").Select(aa => aa.InnerText.Trim()).ToArray();
                string   sWinningNumbers    = string.Join(" ", sPara);                                                               //中獎號碼
                string   sWinningSpclNumber = Data[intA].SelectSingleNode(@"td[@class='td_w font_red14b_center']").InnerText.Trim(); //特別號
                dtLotto.Rows[intA]["獎號"]  = sWinningNumbers;
                dtLotto.Rows[intA]["特別號"] = sWinningSpclNumber;
            }

            this.dgvLotto.DataSource = new BindingSource(dtLotto, null);
            this.dgvLotto.ColumnHeadersDefaultCellStyle.WrapMode = DataGridViewTriState.False;
            this.dgvLotto.AutoSizeColumnsMode = DataGridViewAutoSizeColumnsMode.AllCells;
        }
        private async Task <string> GetResultAsync(string word)
        {
            string result  = "";
            var    htmlDoc = new HtmlDocument();

            var client = new HttpClient();

            int count = 1;

            try
            {
                var response = await client.GetStringAsync(_siteUrl + word);

                htmlDoc.LoadHtml(response);
                HtmlAgilityPack.HtmlNodeCollection translateList = htmlDoc.DocumentNode.SelectNodes(_xPathQueryList);

                if (translateList != null && translateList.Count > 0)
                {
                    List <string> examplesList = new List <string>();
                    var           exampleText  = "";

                    foreach (var item in translateList)
                    {
                        if (count > AppSettings.Instance.MaxExample)
                        {
                            break;
                        }
                        //" You can go now if you like.&ensp;  Если хотите, можете идти.&ensp;&#9776; "
                        exampleText += item.InnerText;
                        count++;
                    }
                    exampleText = Regex.Replace(exampleText, "&ensp;", "");
                    exampleText = Regex.Replace(exampleText, "&#9776;", "");
                    result      = exampleText;
                }
            }
            catch (HttpRequestException e)
            {
                //404 nothing do
            }

            return(result);
        }
示例#52
0
        public string GetChannelURI(Channel channel)
        {
            string streamURL        = string.Empty;
            string scriptXpath      = "/html/body/script[3]";
            string today            = DateTime.Now.ToString("yyyyMMdd");
            string pathToCachedFile = Path.Combine(Path.GetTempPath(), "MagyarTV-StreamURL-" + channel.Name + "-" + DateTime.Now.ToString("yyyy-dd-M-HH") + ".html"); // Web page is fetched at the very least on top of each hour

            HtmlAgilityPack.HtmlDocument htmlDocument = HtmlAgilityPackEx.LoadFromCachedHtmlFile(pathToCachedFile);

            if (htmlDocument == null) // If there is no cached file, load it from the given URL
            {
                HtmlWeb browser = new HtmlWeb();
                htmlDocument = browser.Load(channel.IndexFeed);
                FileStream sw = new FileStream(pathToCachedFile, FileMode.Create);
                htmlDocument.Save(sw);
                sw.Close();
            }

            HtmlAgilityPack.HtmlNodeCollection script = htmlDocument.DocumentNode.SelectNodes(scriptXpath);

            try
            {
                string   currentline = String.Empty;
                var      x           = script.ToList();
                string[] lines       = x[0].InnerText.Split(new string[] { "\n" }, System.StringSplitOptions.RemoveEmptyEntries);
                for (int i = 0; i < lines.Length; i++)
                {
                    currentline = lines[i].Trim();
                    if (currentline.Contains("index.m3u8"))
                    {
                        break;
                    }
                }
                string[] url_parts = currentline.Split('"');
                streamURL = string.Format("https:{0}", url_parts[3].Replace("\\", ""));
            }
            catch (Exception ex)
            {
                throw;
            }

            return(streamURL);
        }
示例#53
0
        public static HtmlAgilityPack.HtmlDocument RemoveScripts(HtmlAgilityPack.HtmlDocument webDocument)
        {
            // Get all Nodes: script
            HtmlAgilityPack.HtmlNodeCollection Nodes = webDocument.DocumentNode.SelectNodes("//script");

            // Make sure not Null:
            if (Nodes == null)
            {
                return(webDocument);
            }

            // Remove all Nodes:
            foreach (HtmlNode node in Nodes)
            {
                node.Remove();
            }

            return(webDocument);
        }
示例#54
0
        private async Task <string> GetResultAsync(string word)
        {
            string result  = "";
            var    htmlDoc = new HtmlDocument();

            var client = new HttpClient();

            int count = 1;

            try
            {
                var response = await client.GetStringAsync(_siteUrl + word);

                htmlDoc.LoadHtml(response);
                HtmlAgilityPack.HtmlNodeCollection translateList = htmlDoc.DocumentNode.SelectNodes(_xPathQueryList);

                if (translateList != null && translateList.Count > 0)
                {
                    List <string> examplesList = new List <string>();

                    foreach (var item in translateList)
                    {
                        if (count > AppSettings.Instance.MaxExample)
                        {
                            break;
                        }

                        examplesList.Add(Regex.Replace(item.InnerText, "&СИМВОЛ(10)&", "").Trim());

                        count++;
                    }

                    result = string.Join("\n", examplesList);
                }
            }
            catch (HttpRequestException e)
            {
                //404 nothing do
            }

            return(result);
        }
示例#55
0
        /// <summary>
        /// URLからフォームを抽出し、FormDataをリターンするメソッド
        /// </summary>
        /// <param name="postdata">Postするバイトデータ</param>
        /// <returns>取得したフォームデータの配列</returns>
        public void searchForms(String postdata)
        {
            firstResponse = getResponse(loginUrl, postdata, null, null);

            //(2)HtmlDocumentクラスにHTMLをセット
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.OptionAutoCloseOnEnd = false;  //最後に自動で閉じる(?)
            doc.OptionCheckSyntax    = false;  //文法チェック。
            doc.OptionFixNestedTags  = true;   //閉じタグが欠如している場合の処理
            HtmlNode.ElementsFlags.Remove("form");

            doc.LoadHtml(firstResponse.body);

            /*
             * System.Diagnostics.Debug.Listeners.Add(new System.Diagnostics.TextWriterTraceListener(Console.Out));
             * System.Diagnostics.Debug.Write(firstResponse.body);
             * DefaultTraceListener dtl = (DefaultTraceListener)Debug.Listeners["Default"];
             * dtl.LogFileName = "./debug.txt";
             */
            Debug.WriteLine(firstResponse.body);
            ArrayList dataList = new ArrayList();

            // FORMの解析

            HtmlAgilityPack.HtmlNodeCollection nodecol   = doc.DocumentNode.SelectNodes("//form");
            HtmlAgilityPack.HtmlNodeCollection nodecola  = doc.DocumentNode.SelectNodes("//input");
            HtmlAgilityPack.HtmlNodeCollection nodecolas = doc.DocumentNode.SelectNodes("//meta");
            if (nodecol != null)
            {
                foreach (HtmlNode elements in nodecol)
                {
                    FormData formData = new FormData((String)elements.GetAttributeValue("name", ""), (String)elements.GetAttributeValue("action", ""));
                    //System.Console.WriteLine("ACTION:" + elements.GetAttribute("action") + System.Environment.NewLine);
                    recSearchform(elements, ref formData);
                    dataList.Add(formData);
                }
            }

            formdatas = (FormData[])dataList.ToArray(typeof(FormData));
        }
示例#56
0
        private void walkinChild(HtmlAgilityPack.HtmlNodeCollection nodes, GameObject parent)
        {
            foreach (HtmlAgilityPack.HtmlNode node in nodes)
            {
                GameObject nodeObject = null;
                Debug.Log(node.Name);

                if (nodeTemplates.ContainsKey(node.Name))
                {
                    nodeObject = nodeTemplates[node.Name].parse(node, parent);
                }
                else
                {
                    nodeObject = parent;
                }

                if (node.HasChildNodes)
                {
                    walkinChild(node.ChildNodes, nodeObject);
                }
            }
        }
示例#57
0
        /// <summary>
        /// 加载
        /// </summary>
        public void Init(Action success, Action <string> fail)
        {
            Http.Get(ServiceConfig.GetConfig(ServiceConfigType.DriveServiceGetUrl)).OnSuccess(content =>
            {
                Html.HtmlDocument document = new Html.HtmlDocument();
                document.LoadHtml(content);
                Inputs  = document.DocumentNode.SelectNodes("//input[@type='password' or @type='hidden' or @type='text']");
                Selects = document.DocumentNode.SelectNodes("//select");

                if (ServiceError.TestServiceError(document.DocumentNode))
                {
                    fail("服务器出错!");
                }
                else
                {
                    success();
                }
            }).TimeOut(5000).OnFail(new Action <WebException>((exp) =>
            {
                fail(exp.ToString());
            })).Go();
        }
        private async Task <string> GetResultAsync(string word)
        {
            string result  = "";
            var    htmlDoc = new HtmlDocument();

            var client = new HttpClient();

            try
            {
                var response = await client.GetStringAsync(_siteUrl + word);

                htmlDoc.LoadHtml(response);
                HtmlAgilityPack.HtmlNodeCollection translateList = htmlDoc.DocumentNode.SelectNodes(_xPathQuery);

                if (translateList != null && translateList.Count > 0)
                {
                    List <string> translateWordList = new List <string>();

                    foreach (var item in translateList)
                    {
                        var wordStr = Regex.Replace(item.InnerText, "\n", "").Trim();

                        if (wordStr != String.Empty)
                        {
                            translateWordList.Add(wordStr);
                        }
                    }

                    result = string.Join(", ", translateWordList);
                }
            }
            catch (HttpRequestException e)
            {
                //404 nothing do
            }

            return(result);
        }
        public List <Doctor> saveDoctorsAndHospitals(int pageNumber)
        {
            List <Doctor> doctors = new List <Doctor>();

            HtmlWeb      web      = new HtmlWeb();
            HtmlDocument document = web.Load($"{siteUrl}/doctors?page={pageNumber}");

            // Get the last pagination number.
            HtmlAgilityPack.HtmlNodeCollection resultItems = document.DocumentNode.SelectNodes("//div[@class=\"result-item-content\"]");
            if (resultItems != null)
            {
                foreach (HtmlAgilityPack.HtmlNode resultItem in resultItems)
                {
                    Doctor dr = new Doctor();

                    string fullName = resultItem.ChildNodes[1].InnerText;
                    dr.Fullname = Regex.Replace(fullName, @"\t|\n|\r", "");

                    HtmlNodeCollection resultItemContents = resultItem.ChildNodes;
                    HtmlNode           tableNode          = resultItem.ChildNodes[3];

                    // Specialty
                    HtmlNode specialtyNode = tableNode.ChildNodes[1].ChildNodes[3];
                    dr.Specialties = getSpecialties(specialtyNode);

                    // Hospitals
                    List <Hospital> hospitals = getHospitals(tableNode.ChildNodes[3].ChildNodes);
                    dr.Hospitals = new List <Hospital>(hospitals);

                    // HMO
                    dr.HMOs = getHMO(tableNode.ChildNodes[5].ChildNodes[3]);

                    doctors.Add(dr);
                }
            }

            return(doctors);
        }
示例#60
0
        public List <string> getChildLinks(string url)
        {
            string htmlText = GetHtmlText(url);

            List <string> childLinks = new List <string>();

            Hap.HtmlDocument htmDoc = new Hap.HtmlDocument();
            htmDoc.LoadHtml(htmlText);
            List <string> dateCaptureUrls = new List <string>();
            List <string> anchorTexts     = new List <string>();
            List <string> anchorTextLangs = new List <string>();

            Hap.HtmlNodeCollection targetElements = htmDoc.DocumentNode.SelectNodes("//table[@data-test]");

            List <string> Links = new List <string>();

            for (int i = 0; i < targetElements.Count; i++)
            {
                string link = "";
                Links.Add(link);
            }
            return(childLinks);
        }