/// <summary> /// /// </summary> /// <param name="content"></param> protected override void ValidatePage(string content) { try { var doc = new HtmlDocument(); doc.LoadHtml(content); var document = doc.DocumentNode; var target = HtmlUtil.GetNodeByAttribute(document, "table", "id", "proxylisttable"); PageIsValid = target != null; } catch (Exception ex) { } }
/// <summary> /// /// </summary> /// <param name="hmaPgDoc"></param> private int ExtractProxies(string hmaPgDoc) { var rowCount = 0; var doc = new HtmlDocument(); doc.LoadHtml(hmaPgDoc); var tblNode = HtmlUtil.GetNodeByAttribute(doc.DocumentNode, "table", "id", "listtable"); if (tblNode == null) { return(rowCount); } var tRowNodes = tblNode.Descendants("tr"); var htmlNodes = tRowNodes as HtmlNode[] ?? tRowNodes.ToArray(); rowCount = htmlNodes.Count(); if (tRowNodes == null || rowCount <= 0) { return(rowCount); } for (var ctr = 1; ctr < htmlNodes.Count(); ctr++) { var protocolOk = true; var anonymityLevelOk = true; var speedOk = true; var connectionTimeOk = true; FreeProxy freeProxy = null; try { if (ParseRowToProxy(htmlNodes.ToArray()[ctr], ref freeProxy)) { if (freeProxy != null) { if (_protocolOptions != null && _protocolOptions.Count() > 0) { protocolOk = _protocolOptions.Any(x => x != freeProxy.Protocol); } else if (Protocol.HasValue) { protocolOk = (Protocol.Value == freeProxy.Protocol); } if (AnonymityLevel.HasValue) { if (AnonymityLevel.Value == ProxyAnonymityLevelEnum.High__KA) { anonymityLevelOk = freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.High__KA; } else if (AnonymityLevel.Value == ProxyAnonymityLevelEnum.PlanetLab) { anonymityLevelOk = freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.PlanetLab; } else { if (AnonymityLevel.Value == ProxyAnonymityLevelEnum.High) { anonymityLevelOk = freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.High; } else if (AnonymityLevel.Value == ProxyAnonymityLevelEnum.Medium) { anonymityLevelOk = freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.High | freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.Medium; } else if (AnonymityLevel.Value == ProxyAnonymityLevelEnum.Low) { anonymityLevelOk = freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.High | freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.Medium | freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.Low; } else if (AnonymityLevel.Value == ProxyAnonymityLevelEnum.None) { anonymityLevelOk = freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.High | freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.Medium | freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.Low | freeProxy.AnonymityLevel == ProxyAnonymityLevelEnum.None; } } } if (Speed.HasValue) { if (Speed.Value == ProxySpeedEnum.Fast) { speedOk = freeProxy.Speed == ProxySpeedEnum.Fast; } else if (Speed.Value == ProxySpeedEnum.Medium) { speedOk = freeProxy.Speed == ProxySpeedEnum.Medium | freeProxy.Speed == ProxySpeedEnum.Fast; } else { speedOk = true; } } if (ConnectionTime.HasValue) { if (ConnectionTime.Value == ProxyConnectionSpeedEnum.Fast) { connectionTimeOk = freeProxy.ConnectionTime == ProxyConnectionSpeedEnum.Fast; } else if (ConnectionTime.Value == ProxyConnectionSpeedEnum.Medium) { connectionTimeOk = freeProxy.ConnectionTime == ProxyConnectionSpeedEnum.Medium | freeProxy.ConnectionTime == ProxyConnectionSpeedEnum.Fast; } else { connectionTimeOk = true; } } } } if (protocolOk && anonymityLevelOk && speedOk && connectionTimeOk) { var exFreeProxy = FreeProxies .ToArray().FirstOrDefault(p => p.HostIP == freeProxy.HostIP && p.PortNo == freeProxy.PortNo); if (exFreeProxy == null) { lock (FreeProxies) { FreeProxies.Add(freeProxy); } InvokeEventFreeIPProxyFetched(new EventHandlers.FreeIPProxyFetchedEventArgs(freeProxy)); } } } catch (Exception ex) { throw; } } return(rowCount); }
/// <summary> /// /// </summary> /// <param name="rowNode"></param> /// <param name="freeProxy"></param> /// <returns></returns> private bool ParseRowToProxy(HtmlNode rowNode, ref FreeProxy freeProxy) { try { var prxCells = rowNode.Descendants("td"); var htmlNodes = prxCells as HtmlNode[] ?? prxCells.ToArray(); if (prxCells != null && htmlNodes.Any()) { freeProxy = new FreeProxy(); freeProxy.HostIP = SnatchIPAddress(htmlNodes.ToArray()[1].InnerHtml); freeProxy.PortNo = int.Parse(htmlNodes.ToArray()[2].InnerText); var country = htmlNodes.ToArray()[3].InnerText.Trim().ToUpper(); country = country.Replace("; ", "___") .Replace(", ", "__") .Replace(" ", "_") .Replace("'", "____"); freeProxy.Country = (ProxyCountry)Enum.Parse(typeof(ProxyCountry), country); var connspeedNode = htmlNodes.ToArray()[5]; connspeedNode = HtmlUtil.GetNodeByAttribute(connspeedNode, "div", "class", "speedbar connection_time"); connspeedNode = connspeedNode.Descendants("div").ToArray()[0]; var rate = connspeedNode.Attributes["style"].Value.Replace("width:", "").Replace("%", ""); freeProxy.ResponseRate = int.Parse(rate); if (freeProxy.ResponseRate <= 35) { freeProxy.ConnectionTime = ProxyConnectionSpeedEnum.Slow; } else if (freeProxy.ResponseRate > 35 && freeProxy.ResponseRate <= 65) { freeProxy.ConnectionTime = ProxyConnectionSpeedEnum.Medium; } else if (freeProxy.ResponseRate > 65) { freeProxy.ConnectionTime = ProxyConnectionSpeedEnum.Medium; } var speedNode = htmlNodes.ToArray()[4]; speedNode = HtmlUtil.GetNodeByAttribute(speedNode, "div", "class", "speedbar response_time"); speedNode = speedNode.Descendants("div").ToArray()[0]; rate = speedNode.Attributes["style"].Value.Replace("width:", "").Replace("%", ""); freeProxy.SpeedRate = int.Parse(rate); if (freeProxy.SpeedRate <= 35) { freeProxy.Speed = ProxySpeedEnum.Slow; } else if (freeProxy.SpeedRate > 35 && freeProxy.SpeedRate <= 65) { freeProxy.Speed = ProxySpeedEnum.Medium; } else if (freeProxy.SpeedRate > 65) { freeProxy.Speed = ProxySpeedEnum.Medium; } var protocol = htmlNodes.ToArray()[6].InnerText.Replace("/", "_"); freeProxy.Protocol = (ProxyProtocolEnum)Enum.Parse(typeof(ProxyProtocolEnum), protocol); var anonymity = htmlNodes.ToArray()[7].InnerText.Replace(" +", "__"); freeProxy.AnonymityLevel = (ProxyAnonymityLevelEnum)Enum.Parse(typeof(ProxyAnonymityLevelEnum), anonymity); freeProxy.Valid = true; return(true); } } catch (Exception ex) { throw; } return(false); }
/// <summary> /// /// </summary> /// <param name="pageContentDocument"></param> /// <param name="freeProxyFetched"></param> /// <returns></returns> public override Queue <FreeProxy> LoadUpIPProxies(string pageContentDocument, Action <FreeProxy> freeProxyFetched) { Queue <FreeProxy> freeProxies = null; try { var doc = new HtmlDocument(); doc.LoadHtml(pageContentDocument); var document = doc.DocumentNode; var target = HtmlUtil.GetNodeByAttribute(document, "table", "id", "proxylist"); if (target == null) { return(null); } var lines = HtmlUtil.GetNodeCollection(target, "tr"); lines.ToList().ForEach(e => { if (freeProxies == null) { freeProxies = new Queue <FreeProxy>(); } if (e.Descendants("td").Count() <= 1) { return; } try { var proxy = new FreeProxy(); var cells = e.Descendants("td").ToArray(); var unwanted = cells[0].Descendants("script"); var enumerable = unwanted as HtmlNode[] ?? unwanted.ToArray(); if (unwanted != null && enumerable.ToArray().Any()) { enumerable.ToArray()[0].Remove(); } var address = cells[0]; var addressParts = address.InnerText.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); proxy.IPAddress = addressParts[0]; proxy.PortNo = int.Parse(addressParts[1]); // get anonymity level //proxy.AnonymityLevel = cells[1].InnerText.Contains("high") // ? ProxyAnonymityLevelEnum.High // : ProxyAnonymityLevelEnum.Medium; proxy.AnonymityLevel = cells[1].InnerText; // get last checked time var checkdate = cells[2].InnerText; //todo: //proxy.LastValidationCheck = DateTime.Parse(checkdate); // get the country var countryPartial = cells[3].InnerText.ToLower().Replace(" ", "_"); countryPartial = countryPartial.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)[0]; var pxycountry = FindProxyCountryFromPartial(countryPartial); proxy.Country = pxycountry; //if (!ProxyTestHelper.CanPing(string.Format("{0}://{1}:{2}", proxy.Protocol == ProxyProtocolEnum.HTTP ? "http" : "https", proxy.HostIP, proxy.PortNo))) //if (!ProxyTestHelper.ProxyIsGood(proxy.HostIP, proxy.PortNo)) return; freeProxies.Enqueue(proxy); freeProxyFetched(proxy); } catch (Exception ex) { throw; } }); } catch (Exception ex) { throw; } return(freeProxies); }
/// <summary> /// /// </summary> /// <param name="pageContentDocument"></param> /// <param name="freeProxyFetched"></param> /// <returns></returns> public override Queue <FreeProxy> LoadUpIPProxies(string pageContentDocument, Action <FreeProxy> freeProxyFetched) { Queue <FreeProxy> freeProxies = null; try { var doc = new HtmlDocument(); doc.LoadHtml(pageContentDocument); var document = doc.DocumentNode; var target = HtmlUtil.GetNodeByAttribute(document, "table", "class", "proxytbl"); if (target == null) { return(null); } var lines = HtmlUtil.GetNodeCollection(target, "tr"); lines.ToList().ForEach(e => { if (freeProxies == null) { freeProxies = new Queue <FreeProxy>(); } if (e.Descendants("td").Count() <= 1) { return; } var proxy = new FreeProxy(); var cells = e.Descendants("td").ToArray(); //ip var ip = cells[0].InnerText; //port var scriptPart = cells[1].Descendants("script"); var htmlNodes = scriptPart as HtmlNode[] ?? scriptPart.ToArray(); if (htmlNodes.Any()) { htmlNodes.ToArray()[0].Remove(); } var port = HtmlUtil.Resolve(cells[1].InnerText); // country var country = HtmlUtil.Resolve(cells[2].InnerText); // anon var anon = HtmlUtil.Resolve(cells[3].InnerText); //https var http = HtmlUtil.Resolve(cells[4].Attributes["class"].Value); //last check var lastChecked = HtmlUtil.Resolve(cells[5].InnerText); freeProxies.Enqueue(proxy); freeProxyFetched(proxy); }); } catch (Exception ex) { throw; } return(freeProxies); }
/// <summary> /// /// </summary> /// <param name="content"></param> protected override void ParseProxyPage(string content) { var doc = new HtmlDocument(); doc.LoadHtml(content); var document = doc.DocumentNode; var target = HtmlUtil.GetNodeByAttribute(document, "table", "class", "proxytbl"); if (target == null) { return; } var lines = HtmlUtil.GetNodeCollection(target, "tr"); lines.ToList().ForEach(e => { if (e.Descendants("td").Count() <= 1) { return; } var proxy = new IPProxy() { ProviderId = GetType().Name.Replace("Cartridge", ""), }; var cells = e.Descendants("td").ToArray(); var scriptPart = cells[1].Descendants("script"); var htmlNodes = scriptPart as HtmlNode[] ?? scriptPart.ToArray(); if (htmlNodes.Any()) { htmlNodes.ToArray()[0].Remove(); } //ip proxy.IPAddress = cells[0].InnerText.Trim(); //port proxy.PortNo = int.Parse(HtmlUtil.Resolve(cells[1].InnerText.Trim()).Replace("\r\n", "").Trim()); // country var country = HtmlUtil.Resolve(cells[2].InnerText.Trim()); var countryPrts = country.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); proxy.Country = Helper.FindProxyCountryFromPartial(countryPrts[0].Replace(" ", "_")); // anon proxy.AnonymityLevel = cells[3].InnerText.Trim().Replace("\r\n", "").Trim().Contains("anonymous") ? ProxyAnonymityLevelsEnum.Anonymous : ProxyAnonymityLevelsEnum.Elite; //protocol var https = HtmlUtil.Resolve(cells[4].Attributes["class"].Value); proxy.Protocol = https.ToLower().Contains("https") ? ProxyProtocolsEnum.HTTPS : ProxyProtocolsEnum.HTTP; //last check var lastChecked = (new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day)); proxy.LastChecked = lastChecked.Add(TimeSpan.Parse(HtmlUtil.Resolve(cells[5].InnerText.Trim()))); RegisterProxy(proxy); }); base.ParseProxyPage(content); }