public int ParseUrl(HTTPRequest page) { int count = 0; if (_page != page) { HTMLPage webPage = new HTMLPage(page); _source = webPage.GetPage(); _page = new HTTPRequest(page); } JSONNode root = JSONNode.LoadJSON(_source); try { _nodeList = root.GetNodes(_data.XPath, _data.ChannelFilter); } catch (Exception) // ex) { //Log.Error("WebEPG: JSON failed"); return count; } if (_nodeList != null) { count = _nodeList.Count; } return count; }
public void Base() { HTTPRequest request = new HTTPRequest("http://www.somewhere.com/path/path/file.html"); Assert.IsTrue("www.somewhere.com" == request.Host); Assert.IsTrue("/path/path/file.html" == request.GetQuery); }
public RequestBuilder(HTTPRequest baseRequest, WorldDateTime startTime, RequestData data) { _baseRequest = baseRequest; _requestTime = startTime; _data = data; _dayOffset = 0; _offset = 0; }
public HTTPRequest GetRequest() { HTTPRequest request = new HTTPRequest(_baseRequest); CultureInfo culture = new CultureInfo(_data.SearchLang); if (_data.DayNames != null) { request.ReplaceTag("[DAY_NAME]", _data.DayNames[_dayOffset]); } if (_data.BaseDate != null) { DateTime basedt = DateTime.Parse(_data.BaseDate); request.ReplaceTag("[DAYS_SINCE]", _requestTime.DaysSince(basedt).ToString()); } request.ReplaceTag("[ID]", _data.ChannelId); request.ReplaceTag("[DAY_OFFSET]", (_dayOffset + _data.OffsetStart).ToString()); request.ReplaceTag("[EPOCH_TIME]", _requestTime.ToEpochTime().ToString()); request.ReplaceTag("[EPOCH_DATE]", _requestTime.ToEpochDate().ToString()); request.ReplaceTag("[DAYOFYEAR]", _requestTime.DateTime.DayOfYear.ToString()); request.ReplaceTag("[YYYY]", _requestTime.Year.ToString()); request.ReplaceTag("[MM]", String.Format("{0:00}", _requestTime.Month)); request.ReplaceTag("[_M]", _requestTime.Month.ToString()); request.ReplaceTag("[MONTH]", _requestTime.DateTime.ToString("MMMM", culture)); request.ReplaceTag("[DD]", String.Format("{0:00}", _requestTime.Day)); request.ReplaceTag("[_D]", _requestTime.Day.ToString()); // this fix is needed for countries where the first day (0) is Sunday (not Monday) // thoose grabbers should include OffsetStart="1" in the Search tag. int dayNum = (int)_requestTime.DateTime.DayOfWeek + _data.OffsetStart; if (dayNum < 0) { dayNum += 7; } if (dayNum > 6) { dayNum = dayNum % 7; } request.ReplaceTag("[DAY_OF_WEEK]", dayNum.ToString()); // check for script defined weekdayname and use it if found if (_data.WeekDayNames != null && dayNum < _data.WeekDayNames.Length && _data.WeekDayNames[dayNum] != String.Empty) { request.ReplaceTag("[WEEKDAY]", _data.WeekDayNames[dayNum]); } else { request.ReplaceTag("[WEEKDAY]", _requestTime.DateTime.ToString(_data.WeekDay, culture)); } request.ReplaceTag("[LIST_OFFSET]", ((_offset * _data.MaxListingCount) + _data.ListStart).ToString()); request.ReplaceTag("[PAGE_OFFSET]", (_offset + _data.PageStart).ToString()); return request; }
public HTTPRequest(HTTPRequest request) { _scheme = request._scheme; _host = request._host; _getQuery = request._getQuery; _postQuery = request._postQuery; _cookies = request._cookies; _externalBrowser = request._externalBrowser; _encoding = request._encoding; _delay = request._delay; }
public void ReplaceTag() { HTTPRequest request = new HTTPRequest("http://www.somewhere.com/[PATH]/[FILE]"); request.PostQuery = "[PATH]?[POSTDATA]"; request.ReplaceTag("[PATH]", "tagpath"); Assert.IsTrue("/tagpath/[FILE]" == request.GetQuery); Assert.IsTrue("tagpath?[POSTDATA]" == request.PostQuery); request.ReplaceTag("[FILE]", "tagfile.html"); Assert.IsTrue("/tagpath/tagfile.html" == request.GetQuery); request.ReplaceTag("[POSTDATA]", "tagpostdata"); Assert.IsTrue("tagpath?tagpostdata" == request.PostQuery); }
public void Add() { HTTPRequest requestBase = new HTTPRequest("http://www.somewhere.com/path/path/file.html"); HTTPRequest request; request = requestBase.Add("../relpath/relfile.html"); Assert.IsTrue("http://www.somewhere.com/path/relpath/relfile.html" == request.Url); request = requestBase.Add("/newpath/newfile.html"); Assert.IsTrue("http://www.somewhere.com/newpath/newfile.html" == request.Url); request = requestBase.Add("http://www.somewhere_else.com/path/file.html"); Assert.IsTrue("http://www.somewhere_else.com/path/file.html" == request.Url); requestBase = new HTTPRequest("http://spettacolo.alice.it/guidatv/cgi/index.cgi?tipo=3&channel=22"); request = requestBase.Add("?tipo=1&qs=8353441"); Assert.IsTrue("http://spettacolo.alice.it/guidatv/cgi/index.cgi?tipo=1&qs=8353441" == request.Url); }
public int ParseUrl(HTTPRequest page) { int count = 0; if (_page != page) { HTMLPage webPage = new HTMLPage(page); _source = webPage.GetPage(); _page = new HTTPRequest(page); } XmlDocument _xmlDoc = new XmlDocument(); try { _xmlDoc.LoadXml(_source); if (_data.Channel != string.Empty) { _nodeList = _xmlDoc.DocumentElement.SelectNodes(_data.XPath + "[@" + _data.Channel + "=\"" + _channelName + "\"]"); } else { _nodeList = _xmlDoc.DocumentElement.SelectNodes(_data.XPath); } } catch (XmlException) // ex) { //Log.Error("WebEPG: XML failed"); return count; } if (_nodeList != null) { count = _nodeList.Count; } return count; }
/// <summary> /// Parses the URL and returns the number of instances of the template found on this site /// </summary> /// <param name="site">The site.</param> /// <returns>count</returns> public int ParseUrl(HTTPRequest site) { HTMLPage webPage = new HTMLPage(site); string pageSource = webPage.GetPage(); int startIndex = 0; if (_template.Start != null && _template.Start != string.Empty) { startIndex = pageSource.IndexOf(_template.Start, 0, StringComparison.OrdinalIgnoreCase); if (startIndex == -1) { startIndex = 0; } //log error ? } int endIndex = pageSource.Length; if (_template.End != null && _template.End != string.Empty) { endIndex = pageSource.IndexOf(_template.End, startIndex, StringComparison.OrdinalIgnoreCase); if (endIndex == -1) { endIndex = pageSource.Length; } //log error? } int count = 0; if (pageSource != null) { //run the replacement tasks before parsing the page source count = _profiler.MatchCount(Replacements(pageSource.Substring(startIndex, endIndex - startIndex))); } return count; }
public void EqualOperator() { HTTPRequest request1 = new HTTPRequest("http://www.somewhere.com/1/1"); HTTPRequest request2 = new HTTPRequest("http://www.somewhere.com/2/2"); HTTPRequest request3 = new HTTPRequest("http://www.somewhere.com/1/1"); Assert.IsTrue(request1 == request3); Assert.IsFalse(request1 == request2); Assert.IsTrue(request1 != request2); request1.PostQuery = "post1"; request2.PostQuery = "post2"; request3.PostQuery = "post1"; Assert.IsTrue(request1 == request3); Assert.IsFalse(request1 == request2); Assert.IsTrue(request1 != request2); HTTPRequest request = null; Assert.IsTrue(request == null); Assert.IsTrue(request1 != null); }
/// <summary> /// Initializes a new instance of the <see cref="HTMLPage"/> class. /// </summary> /// <param name="page">The page request.</param> public HTMLPage(HTTPRequest page) : this() { _encoding = page.Encoding; LoadPage(page); }
/// <summary> /// Initializes a new instance of the <see cref="HTMLPage"/> class. /// </summary> /// <param name="page">The page request.</param> /// <param name="encoding">The encoding.</param> public HTMLPage(HTTPRequest page, string encoding) : this() { _encoding = encoding; LoadPage(page); }
/// <summary> /// Performs HTTPRequest /// </summary> /// <param name="request">The request.</param> /// <returns></returns> public bool HTTPGet(HTTPRequest request) { return Transaction(request); }
/// <summary> /// Initializes a new instance of the <see cref="HTTPTransaction"/> class and performs HTTPRequest. /// </summary> /// <param name="request">The request.</param> public HTTPTransaction(HTTPRequest request) { Transaction(request); }
/// <summary> /// Performs HTTP transactions for the specified page request. /// </summary> /// <param name="pageRequest">The page request.</param> /// <returns>bool - Success/fail</returns> private bool Transaction(HTTPRequest pageRequest) { ArrayList Blocks = new ArrayList(); byte[] Block; byte[] readBlock; int size; int totalSize; DateTime startTime = DateTime.Now; if (pageRequest.Delay > 0) { Thread.Sleep(pageRequest.Delay); } string agent = string.IsNullOrEmpty(pageRequest.UserAgent)? _agent : pageRequest.UserAgent; Uri pageUri = pageRequest.Uri; try { // Make the Webrequest // Create the request header HttpWebRequest request = (HttpWebRequest)WebRequest.Create(pageUri); try { // Use the current user in case an NTLM Proxy or similar is used. // request.Proxy = WebProxy.GetDefaultProxy(); request.Proxy.Credentials = CredentialCache.DefaultCredentials; } catch (Exception) {} request.UserAgent = agent; request.AllowAutoRedirect = false; if (pageRequest.Cookies != string.Empty) { string[] cookiesArray = pageRequest.Cookies.Split(new Char[] {';'}); foreach (string cookie in cookiesArray) { string[] cookieParts = cookie.Split(new Char[] {'='}); if (cookieParts.Length >= 2) { if (_cookies == null) _cookies = new CookieCollection(); _cookies.Add(new Cookie(cookieParts[0], cookieParts[1], "/", request.RequestUri.Host)); } } } if (pageRequest.PostQuery == string.Empty) { // GET request if (_auth != null) { request.Credentials = _auth.Get(pageUri.Host); } request.CookieContainer = new CookieContainer(); if (_cookies != null) { request.CookieContainer.Add(_cookies); } } else { // POST request request.ContentType = _postType; request.ContentLength = pageRequest.PostQuery.Length; request.Method = "POST"; request.CookieContainer = new CookieContainer(); if (_cookies != null) request.CookieContainer.Add(_cookies); // Write post message try { Stream OutputStream = request.GetRequestStream(); StreamWriter WriteStream = new StreamWriter(OutputStream); WriteStream.Write(pageRequest.PostQuery); WriteStream.Flush(); } catch (WebException ex) { _error = ex.Message; return false; } } _response = (HttpWebResponse)request.GetResponse(); // Check for redirection if ((_response.StatusCode == HttpStatusCode.Found) || (_response.StatusCode == HttpStatusCode.Redirect) || (_response.StatusCode == HttpStatusCode.Moved) || (_response.StatusCode == HttpStatusCode.MovedPermanently)) { Uri uri = new Uri(pageUri, _response.Headers["Location"]); HttpWebRequest redirect = (HttpWebRequest)WebRequest.Create(uri); try { // Use the current user in case an NTLM Proxy or similar is used. // request.Proxy = WebProxy.GetDefaultProxy(); redirect.Proxy.Credentials = CredentialCache.DefaultCredentials; } catch (Exception) {} redirect.UserAgent = agent; redirect.AllowAutoRedirect = false; redirect.Referer = _response.ResponseUri.ToString(); redirect.CookieContainer = new CookieContainer(); if (_response.Headers["Set-Cookie"] != null) { string cookieStr = _response.Headers["Set-Cookie"]; Regex cookieParser = new Regex("(?<name>[^=]+)=(?<value>[^;]+)(;)"); Match result = cookieParser.Match(cookieStr); if (result.Success) { Cookie reply = new Cookie(result.Groups["name"].ToString(), result.Groups["value"].ToString()); reply.Domain = uri.Host; redirect.CookieContainer.Add(reply); } } //redirect.ContentType = "text/html"; _response = (HttpWebResponse)redirect.GetResponse(); } if (request.CookieContainer != null) { _response.Cookies = request.CookieContainer.GetCookies(request.RequestUri); _cookies = _response.Cookies; } Stream ReceiveStream = _response.GetResponseStream(); Block = new byte[blockSize]; totalSize = 0; while ((size = ReceiveStream.Read(Block, 0, blockSize)) > 0) { readBlock = new byte[size]; Array.Copy(Block, readBlock, size); Blocks.Add(readBlock); totalSize += size; } ReceiveStream.Close(); _response.Close(); int pos = 0; _data = new byte[totalSize]; for (int i = 0; i < Blocks.Count; i++) { Block = (byte[])Blocks[i]; Block.CopyTo(_data, pos); pos += Block.Length; } } catch (WebException ex) { _error = ex.Message; return false; } // Collect sits statistics if (_stats != null) { DateTime endTime = DateTime.Now; TimeSpan duration = endTime - startTime; _stats.Add(pageUri.Host, 1, _data.Length, duration); } return true; }
/// <summary> /// Loads a page from cache. /// </summary> /// <param name="pageUri">The page URI.</param> /// <returns>bool - true if the page is in the cache</returns> public bool LoadPage(HTTPRequest page) { if (_cacheMode == Mode.Enabled) { if (LoadCacheFile(GetCacheFileName(page))) { return true; } } return false; }
/// <summary> /// Gets the page using external com browser IE. /// </summary> /// <param name="page">The page request.</param> /// <returns>true if successful</returns> private bool GetExternal(HTTPRequest page) { return(GetInternal(page)); }
/// <summary> /// Initializes a new instance of the <see cref="WebParser"/> class. /// </summary> /// <param name="webTemplate">The web template.</param> public WebParser(WebParserTemplate webTemplate) { // Store the template _template = webTemplate; // Get default template -> currently only a default is supported // In the future template per channel ID can be added. HtmlParserTemplate listingTemplate = _template.GetTemplate("default"); _listingPreference = _template.GetPreference("default"); // Create dictionary for month strings Dictionary<string, int> monthsDict = null; if (_template.months != null) { // template contains months list -> load into dictionary monthsDict = new Dictionary<string, int>(); for (int i = 0; i < _template.months.Length; i++) { monthsDict.Add(_template.months[i], i + 1); ; } } // create new Html Parser using default template _listingParser = new HtmlParser(listingTemplate, typeof (ProgramData), monthsDict); // setup sublink parser if template and config exists _sublinkParser = null; if (_template.sublinks != null && _template.sublinks.Count > 0) { // Load first sublink template -> only one supported now // future support for multiple sublinks possible SublinkInfo sublink = _template.sublinks[0]; HtmlParserTemplate sublinkTemplate = _template.GetTemplate(sublink.template); _sublinkPreference = _template.GetPreference(sublinkTemplate.Name); if (sublinkTemplate != null) { // create sublink parser using template _sublinkParser = new HtmlParser(sublinkTemplate, typeof (ProgramData)); _sublinkMatch = sublink.search; _sublinkRequest = sublink.url; } } }
/// <summary> /// Parses a site for a given URL. /// </summary> /// <param name="site">The request for the site to be parsed</param> /// <returns>number of instances of the tempate found</returns> public int ParseUrl(HTTPRequest site) { if (_sublinkParser != null && _sublinkRequest == null) { _sublinkRequest = new HTTPRequest(site); _sublinkRequest.Delay = 0; // do not use delay for sublinks unless explicitly requested } return _listingParser.ParseUrl(site); }
/// <summary> /// Deletes a cached page. /// </summary> /// <param name="pageUri">The page URI.</param> public void DeleteCachePage(HTTPRequest page) { string file = GetCacheFileName(page); if (File.Exists(file)) { File.Delete(file); } }
/// <summary> /// Gets the name of the cache file. /// </summary> /// <param name="Page">The page.</param> /// <returns>filename</returns> private static string GetCacheFileName(HTTPRequest Page) { uint gethash = (uint)Page.Uri.GetHashCode(); if (Page.PostQuery == null || Page.PostQuery == string.Empty) { return CACHE_DIR + "/" + Page.Host + "_" + gethash.ToString() + ".html"; } uint posthash = (uint)Page.PostQuery.GetHashCode(); return CACHE_DIR + "/" + Page.Host + "_" + gethash.ToString() + "_" + posthash.ToString() + ".html"; }
/// <summary> /// Saves a page to the cache. /// </summary> /// <param name="pageUri">The page URI.</param> /// <param name="strSource">The HTML source.</param> public void SavePage(HTTPRequest page, string strSource) { if (_cacheMode != Mode.Disabled) { SaveCacheFile(GetCacheFileName(page), strSource); } }
/// <summary> /// Gets the page using internal .NET /// </summary> /// <param name="page">The page request.</param> /// <returns>true if sucessful</returns> private bool GetInternal(HTTPRequest page) { // Use internal code to get HTML page HTTPTransaction Page = new HTTPTransaction(); Encoding encode; string strEncode = _defaultEncode; if (Page.HTTPGet(page)) { byte[] pageData = Page.GetData(); int i; if (_encoding != "") { strEncode = _encoding; _pageEncodingMessage = "Forced: " + _encoding; } else { encode = System.Text.Encoding.GetEncoding(_defaultEncode); _strPageSource = encode.GetString(pageData); int headEnd; if ((headEnd = _strPageSource.ToLower().IndexOf("</head")) != -1) { if ((i = _strPageSource.ToLower().IndexOf("charset", 0, headEnd)) != -1) { strEncode = ""; i += 8; for (; i < _strPageSource.Length && _strPageSource[i] != '\"'; i++) { strEncode += _strPageSource[i]; } _encoding = strEncode; } if (strEncode == "") { strEncode = _defaultEncode; _pageEncodingMessage = "Default: " + _defaultEncode; } else { _pageEncodingMessage = strEncode; } } } Log.Debug("HTMLPage: GetInternal encoding: {0}", _pageEncodingMessage); // Encoding: depends on selected page if (string.IsNullOrEmpty(_strPageSource) || strEncode.ToLower() != _defaultEncode) { try { encode = System.Text.Encoding.GetEncoding(strEncode); _strPageSource = encode.GetString(pageData); } catch (System.ArgumentException e) { Log.Write(e); } } return(true); } _error = Page.GetError(); if (!string.IsNullOrEmpty(_error)) { Log.Error("HTMLPage: GetInternal error: {0}", _error); } return(false); }
public XmlParser(XmlParserTemplate data) { _page = null; _data = data; _dataType = typeof (ProgramData); }
// Add relative or absolute url public HTTPRequest Add(string relativeUri) { if (relativeUri.StartsWith("?")) relativeUri = Uri.LocalPath + relativeUri; Uri newUri = new Uri(Uri, relativeUri); HTTPRequest newHTTPRequest = new HTTPRequest(newUri); newHTTPRequest._encoding = this._encoding; return newHTTPRequest; }
/// <summary> /// Loads the page. /// </summary> /// <param name="page">The page request.</param> /// <returns>true if sucessful</returns> public bool LoadPage(HTTPRequest page) { if (_cache != null && _cache.Initialised) { if (_cache.LoadPage(page)) { _strPageSource = _cache.GetPage(); return true; } } // Delay before getting page if (page.Delay > 0) Thread.Sleep(page.Delay); bool success; if (page.External) { success = GetExternal(page); } else { success = GetInternal(page); } if (success) { if (_cache != null && _cache.Initialised) { _cache.SavePage(page, _strPageSource); } return true; } return false; }
/// <summary> /// Gets the hyper link. /// </summary> /// <param name="index">The index.</param> /// <param name="match">The match.</param> /// <param name="linkURL">The link URL.</param> /// <returns>bool - success/fail</returns> public bool GetHyperLink(int index, string match, ref HTTPRequest linkURL) { string regex = "<(a |[^>]*onclick)[^>]*" + match + "[^>]*>"; //"<a .*? href=[^>]*" .ToLowerInvariant() string result = SearchRegex(index, regex, true, false); if (result == null) { return(false); } bool linkFound = false; string strLinkURL = string.Empty; int start = -1; char delim = '>'; if (result.ToLowerInvariant().IndexOf("href=") != -1) { start += result.ToLowerInvariant().IndexOf("href=") + 5; } if (result.ToLowerInvariant().IndexOf("onclick=") != -1) { start += result.ToLowerInvariant().IndexOf("onclick=") + 8; } if (result[start + 1] == '\"' || result[start + 1] == '\'') { start++; delim = result[start]; } int end = -1; //if (delim != '>') //{ // start = -1; // start = result.IndexOf(delim); //} if (start != -1) { end = result.IndexOf(delim, ++start); } if (end != -1) { strLinkURL = result.Substring(start, end - start); linkFound = true; } if ((start = strLinkURL.IndexOf("=")) != -1) { for (int i = 0; i < strLinkURL.Length - start; i++) { if (strLinkURL[start + i] == '\"' || strLinkURL[start + i] == '\'') { delim = strLinkURL[start + i]; start = start + i; break; } } end = -1; if (start != -1) { end = strLinkURL.IndexOf(delim, ++start); } if (end != -1) { strLinkURL = strLinkURL.Substring(start, end - start); } } string[] param = GetJavaSubLinkParams(result); //strLinkURL); if (param != null) { if (!linkURL.HasTag("[1]")) { linkURL = linkURL.Add(HtmlString.ToAscii(param[0])); } else { for (int i = 0; i < param.Length; i++) { linkURL.ReplaceTag("[" + (i + 1).ToString() + "]", HtmlString.ToAscii(param[i])); } } } else { linkURL = linkURL.Add(HtmlString.ToAscii(strLinkURL.Trim())); } //} return(linkFound); }
public int ParseUrl(HTTPRequest site) { HTMLPage webPage = new HTMLPage(site); return _rows.RowCount(webPage.GetPage()); }
/// <summary> /// Gets the page using external com browser IE. /// </summary> /// <param name="page">The page request.</param> /// <returns>true if successful</returns> private bool GetExternal(HTTPRequest page) { return GetInternal(page); }
/// <summary> /// Gets the data. /// </summary> /// <param name="index">The index.</param> /// <returns></returns> public IParserData GetData(int index) { // Perform any search and remove requests // Searches can search over the whole data // optionally removing text so it will not be parsed with the main data ProgramData searchData = null; if (_template.searchList != null && _template.searchList.Count > 0) { searchData = new ProgramData(); for (int i = 0; i < _template.searchList.Count; i++) { WebSearchData search = _template.searchList[i]; string result = _listingParser.SearchRegex(index, search.Match, search.Remove); if (result != null) { searchData.SetElement(search.Field, result); } } } // Get the parsed data at index ProgramData data = ((ProgramData)_listingParser.GetData(index)); if (data != null) { // Set the data preference -> important for merging data (eg data from sublink page) data.Preference = new DataPreference(_listingPreference); // If search data exists merge. if (searchData != null) { data.Merge(searchData); } // If there is a sublink parser, check for a matching sublink // sublink is not parsed here, because that may not be required // the URL for the sublink will be built and stored for future use see GetLinkedData() if (_sublinkParser != null) { HTTPRequest sublinkRequest = new HTTPRequest(_sublinkRequest); // Minimum delay disabled because of a bug it wasn't being used anyway (always 0) and // possibly not needed to start using now. // Enabling has serious impact on grabspeed. // if (sublinkRequest.Delay < 500) // { // sublinkRequest.Delay = 500; // } if (_listingParser.GetHyperLink(index, _sublinkMatch, ref sublinkRequest)) { data.SublinkRequest = sublinkRequest; } } } return data; }
/// <summary> /// Gets the page using internal .NET /// </summary> /// <param name="page">The page request.</param> /// <returns>true if sucessful</returns> private bool GetInternal(HTTPRequest page) { // Use internal code to get HTML page using (HTTPTransaction Page = new HTTPTransaction()) { Encoding encode; string strEncode = _defaultEncode; if (Page.HTTPGet(page)) { byte[] pageData = Page.GetData(); int i; if (_encoding != "") { strEncode = _encoding; _pageEncodingMessage = "Forced: " + _encoding; } else { encode = System.Text.Encoding.GetEncoding(_defaultEncode); _strPageSource = encode.GetString(pageData); int headEnd; if ((headEnd = _strPageSource.ToLower().IndexOf("</head")) != -1) { if ((i = _strPageSource.ToLower().IndexOf("charset", 0, headEnd)) != -1) { strEncode = ""; i += 8; for (; i < _strPageSource.Length && _strPageSource[i] != '\"'; i++) { strEncode += _strPageSource[i]; } _encoding = strEncode; } if (strEncode == "") { strEncode = _defaultEncode; _pageEncodingMessage = "Default: " + _defaultEncode; } else { _pageEncodingMessage = strEncode; } } } GlobalServiceProvider.Get<ILog>().Debug("HTMLPage: GetInternal encoding: {0}", _pageEncodingMessage); // Encoding: depends on selected page if (string.IsNullOrEmpty(_strPageSource) || strEncode.ToLower() != _defaultEncode) { try { encode = System.Text.Encoding.GetEncoding(strEncode); _strPageSource = encode.GetString(pageData); } catch (System.ArgumentException e) { GlobalServiceProvider.Get<ILog>().Error(e); } } return true; } _error = Page.GetError(); if (!string.IsNullOrEmpty(_error)) GlobalServiceProvider.Get<ILog>().Error("HTMLPage: GetInternal error: {0}", _error); return false; } }
/// <summary> /// Gets the hyper link. /// </summary> /// <param name="index">The index.</param> /// <param name="match">The match.</param> /// <param name="linkURL">The link URL.</param> /// <returns>bool - success/fail</returns> public bool GetHyperLink(int index, string match, ref HTTPRequest linkURL) { string regex = "<(a |[^>]*onclick)[^>]*" + match + "[^>]*>"; //"<a .*? href=[^>]*" .ToLowerInvariant() string result = SearchRegex(index, regex, true, false); if (result == null) { return false; } bool linkFound = false; string strLinkURL = string.Empty; int start = -1; char delim = '>'; if (result.ToLowerInvariant().IndexOf("href=") != -1) { start += result.ToLowerInvariant().IndexOf("href=") + 5; } if (result.ToLowerInvariant().IndexOf("onclick=") != -1) { start += result.ToLowerInvariant().IndexOf("onclick=") + 8; } if (result[start + 1] == '\"' || result[start + 1] == '\'') { start++; delim = result[start]; } int end = -1; //if (delim != '>') //{ // start = -1; // start = result.IndexOf(delim); //} if (start != -1) { end = result.IndexOf(delim, ++start); } if (end != -1) { strLinkURL = result.Substring(start, end - start); linkFound = true; } if ((start = strLinkURL.IndexOf("=")) != -1) { for (int i = 0; i < strLinkURL.Length - start; i++) { if (strLinkURL[start + i] == '\"' || strLinkURL[start + i] == '\'') { delim = strLinkURL[start + i]; start = start + i; break; } } end = -1; if (start != -1) { end = strLinkURL.IndexOf(delim, ++start); } if (end != -1) { strLinkURL = strLinkURL.Substring(start, end - start); } } string[] param = GetJavaSubLinkParams(result); //strLinkURL); if (param != null) { if (!linkURL.HasTag("[1]")) { linkURL = linkURL.Add(HtmlString.ToAscii(param[0])); } else { for (int i = 0; i < param.Length; i++) { linkURL.ReplaceTag("[" + (i + 1).ToString() + "]", HtmlString.ToAscii(param[i])); } } } else { linkURL = linkURL.Add(HtmlString.ToAscii(strLinkURL.Trim())); } //} return linkFound; }
/// <summary> /// Initializes a new instance of the <see cref="HTMLPage"/> class. /// </summary> /// <param name="page">The page request.</param> public HTMLPage(HTTPRequest page) : this() { _encoding = page.Encoding; LoadPage(page); }
/// <summary> /// Initializes a new instance of the <see cref="HTMLPage"/> class. /// </summary> /// <param name="page">The page request.</param> /// <param name="encoding">The encoding.</param> public HTMLPage(HTTPRequest page, string encoding) : this() { _encoding = encoding; LoadPage(page); }
/// <summary> /// Gets the page using external com browser IE. /// </summary> /// <param name="page">The page request.</param> /// <returns>true if successful</returns> private bool GetExternal(HTTPRequest page) { // Use External Browser (IE) to get HTML page // IE downloads all linked graphics ads, etc // IE will run Javascript source if required to renderthe page if (_IE == null) { _IE = new InternetExplorer(); } IWebBrowser2 webBrowser = (IWebBrowser2)_IE; object empty = Missing.Value; // check if request is POST or GET if (page.PostQuery != null) { ASCIIEncoding encoding = new ASCIIEncoding(); object postData = (object)encoding.GetBytes(page.PostQuery); object header = (object)"Content-Type: application/x-www-form-urlencoded\n\r"; webBrowser.Navigate(page.Url, ref empty, ref empty, ref postData, ref header); } else { webBrowser.Navigate(page.Url, ref empty, ref empty, ref empty, ref empty); } while (webBrowser.Busy == true) { Thread.Sleep(500); } HTMLDocumentClass doc = (HTMLDocumentClass)webBrowser.Document; _strPageSource = doc.body.innerHTML; return true; }