/// <summary> /// Prefetches some IMDB details like title and year to assist other data providers /// </summary> /// <remarks> /// We might have to replace/link this to the data provider for IMDB so we don't have redundant logic /// </remarks> /// <param name="ImdbId"></param> /// <returns></returns> private static string getImdbDetailsPage(string ImdbId) { WebGrabber grabber = new WebGrabber("http://www.imdb.com/title/" + ImdbId); if (grabber.GetResponse()) return HttpUtility.HtmlDecode(grabber.GetString()); else return null; }
/// <summary> /// Grabs the movie meta data from the Windows Meta Services webservice /// using the DiscID /// </summary> /// <param name="DiscID"></param> /// <returns>Metadata in XML format</returns> private static XmlNodeList GetMovieMetaData(string DiscID) { WebGrabber grabber = new WebGrabber(urlWindowsMetaServicesQueryDiscId + DiscID); grabber.Encoding = Encoding.UTF8; if (grabber.GetResponse()) return grabber.GetXML("METADATA"); else return null; }
//function that will be called to attempt to determine the from languange if none is given. private void DetectLanguage(string input) { try { Regex reg = new Regex(@"language"".""(?<languageCode>[^""]+)"); string url = String.Format("http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}", input); WebGrabber webGrabber = new WebGrabber(url); webGrabber.GetResponse(); string result = webGrabber.GetString(); Match match = reg.Match(result); string matched = match.Groups["languageCode"].Value; _fromLanguage = LanguageUtility.GetLanguage(matched); if (_debug) logger.Debug("Translation tool: Detect Language: Source: {0} : Detected Language: {1}", result, LanguageUtility.ToString(_fromLanguage)); } catch (Exception e) { if (e is ThreadAbortException) throw e; _fromLanguage = LanguageUtility.GetLanguage("en"); } }
//function processes a string to translate. private string GetTranslation(string toTrans) { string translatedString = String.Empty; string url = String.Format("http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&langpair={0}&q={1}", _strLanguagePare, toTrans); WebGrabber webGrabber = new WebGrabber(url); webGrabber.GetResponse(); translatedString = webGrabber.GetString(); if (_debug) logger.Debug("Translation tool: Uncleaned Translation: {0}", translatedString); // remove all json from responce and return only translated string. if (translatedString.Length > 0) { translatedString = ResponseClean(translatedString); } return translatedString; }
// Retrieves an URL private string RetrieveUrl(Dictionary<string, string> variables) { string parsedUrl = parseString(variables, url); string parsedUserAgent = parseString(variables, userAgent); string pageContents = string.Empty; if (_useCaching && Context.Cache.ContainsKey(parsedUrl)) { logger.Debug("Using Cached Version of URL: {0}", parsedUrl); return Context.Cache[parsedUrl]; } if (Context.DebugMode) logger.Debug("Retrieving URL: {0}", parsedUrl); // Try to grab the document try { WebGrabber grabber = new WebGrabber(parsedUrl); grabber.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; grabber.UserAgent = parsedUserAgent; grabber.Encoding = encoding; grabber.Timeout = timeout; grabber.TimeoutIncrement = timeoutIncrement; grabber.MaxRetries = maxRetries; grabber.AllowUnsafeHeader = allowUnsafeHeader; grabber.CookieHeader = cookies; grabber.AcceptLanguage = acceptLanguage; grabber.Debug = Context.DebugMode; // Keep session / chaining string sessionKey = "urn://scraper/header/" + grabber.Request.RequestUri.Host; if (variables.ContainsKey(sessionKey)) { if (grabber.CookieHeader == null) grabber.CookieHeader = variables[sessionKey]; else grabber.CookieHeader = grabber.CookieHeader + "," + variables[sessionKey]; } // Retrieve the document if (grabber.GetResponse()) { // save cookie session data for future requests setVariable(variables, sessionKey, grabber.CookieHeader); // grab the request results and store in our cache for later retrievals pageContents = grabber.GetString(); if (_useCaching) Context.Cache[parsedUrl] = pageContents; } } catch (Exception e) { if (e is ThreadAbortException) throw e; logger.Warn("Could not connect to " + parsedUrl + ". " + e.Message); } return pageContents; }