//function that will be called to attempt to determine the from languange if none is given. private void DetectLanguage(string input) { try { Regex reg = new Regex(@"language"".""(?<languageCode>[^""]+)"); string url = String.Format("http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}", input); WebGrabber webGrabber = new WebGrabber(url); webGrabber.GetResponse(); string result = webGrabber.GetString(); Match match = reg.Match(result); string matched = match.Groups["languageCode"].Value; _fromLanguage = LanguageUtility.GetLanguage(matched); if (_debug) { logger.Debug("Translation tool: Detect Language: Source: {0} : Detected Language: {1}", result, LanguageUtility.ToString(_fromLanguage)); } } catch (Exception e) { if (e is ThreadAbortException) { throw e; } _fromLanguage = LanguageUtility.GetLanguage("en"); } }
private static string GetJson(string url, string postData = null) { if (OnDataSend != null) { OnDataSend(url, postData); } WebGrabber grabber = WebUtils.GetWebGrabberInstance(url); grabber.Encoding = Encoding.UTF8; grabber.Accept = "application/json"; if (grabber.GetResponse()) { string response = grabber.GetString(); if (OnDataReceived != null) { OnDataReceived(response); } return(response); } else { return(null); } }
/// <summary> /// Prefetches some IMDb details like title and year to assist other data providers /// </summary> /// <remarks> /// We might have to replace/link this to the data provider for IMDb so we don't have redundant logic /// </remarks> /// <param name="ImdbId"></param> /// <returns></returns> private static string getImdbDetailsPage(string ImdbId) { WebGrabber grabber = new WebGrabber("http://m.imdb.com/title/" + ImdbId); if (grabber.GetResponse()) { return(HttpUtility.HtmlDecode(grabber.GetString())); } else { return(null); } }
private static string getJson(string url) { WebGrabber grabber = Utility.GetWebGrabberInstance(url); grabber.Encoding = Encoding.UTF8; if (grabber.GetResponse()) { return(grabber.GetString()); } else { return(null); } }
//function processes a string to translate. private string GetTranslation(string toTrans) { string translatedString = String.Empty; string url = String.Format("http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&langpair={0}&q={1}", _strLanguagePare, toTrans); WebGrabber webGrabber = new WebGrabber(url); webGrabber.GetResponse(); translatedString = webGrabber.GetString(); if (_debug) { logger.Debug("Translation tool: Uncleaned Translation: {0}", translatedString); } // remove all json from responce and return only translated string. if (translatedString.Length > 0) { translatedString = ResponseClean(translatedString); } return(translatedString); }
// Retrieves an URL private string RetrieveUrl(Dictionary <string, string> variables) { string parsedUrl = parseString(variables, url); string parsedUserAgent = parseString(variables, userAgent); string pageContents = string.Empty; if (_useCaching && Context.Cache.ContainsKey(parsedUrl)) { logger.Debug("Using Cached Version of URL: {0}", parsedUrl); return(Context.Cache[parsedUrl]); } if (Context.DebugMode) { logger.Debug("Retrieving URL: {0}", parsedUrl); } // Try to grab the document try { WebGrabber grabber = new WebGrabber(parsedUrl); grabber.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; grabber.UserAgent = parsedUserAgent; grabber.Encoding = encoding; grabber.Timeout = timeout; grabber.TimeoutIncrement = timeoutIncrement; grabber.MaxRetries = maxRetries; grabber.AllowUnsafeHeader = allowUnsafeHeader; grabber.CookieHeader = cookies; grabber.AcceptLanguage = acceptLanguage; grabber.Debug = Context.DebugMode; // Keep session / chaining string sessionKey = "urn://scraper/header/" + grabber.Request.RequestUri.Host; if (variables.ContainsKey(sessionKey)) { if (grabber.CookieHeader == null) { grabber.CookieHeader = variables[sessionKey]; } else { grabber.CookieHeader = grabber.CookieHeader + "," + variables[sessionKey]; } } // Retrieve the document if (grabber.GetResponse()) { // save cookie session data for future requests setVariable(variables, sessionKey, grabber.CookieHeader); // grab the request results and store in our cache for later retrievals pageContents = grabber.GetString(); if (_useCaching) { Context.Cache[parsedUrl] = pageContents; } } } catch (Exception e) { if (e is ThreadAbortException) { throw e; } logger.Warn("Could not connect to " + parsedUrl + ". " + e.Message); } return(pageContents); }