Exemple #1
0
        //function that will be called to attempt to determine the from languange if none is given.
        private void DetectLanguage(string input)
        {
            try {
                Regex      reg        = new Regex(@"language"".""(?<languageCode>[^""]+)");
                string     url        = String.Format("http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}", input);
                WebGrabber webGrabber = new WebGrabber(url);
                webGrabber.GetResponse();
                string result  = webGrabber.GetString();
                Match  match   = reg.Match(result);
                string matched = match.Groups["languageCode"].Value;
                _fromLanguage = LanguageUtility.GetLanguage(matched);
                if (_debug)
                {
                    logger.Debug("Translation tool: Detect Language: Source: {0} : Detected Language: {1}", result, LanguageUtility.ToString(_fromLanguage));
                }
            }
            catch (Exception e) {
                if (e is ThreadAbortException)
                {
                    throw e;
                }

                _fromLanguage = LanguageUtility.GetLanguage("en");
            }
        }
Exemple #2
0
        private static string GetJson(string url, string postData = null)
        {
            if (OnDataSend != null)
            {
                OnDataSend(url, postData);
            }

            WebGrabber grabber = WebUtils.GetWebGrabberInstance(url);

            grabber.Encoding = Encoding.UTF8;
            grabber.Accept   = "application/json";

            if (grabber.GetResponse())
            {
                string response = grabber.GetString();

                if (OnDataReceived != null)
                {
                    OnDataReceived(response);
                }

                return(response);
            }
            else
            {
                return(null);
            }
        }
Exemple #3
0
        /// <summary>
        /// Prefetches some IMDb details like title and year to assist other data providers
        /// </summary>
        /// <remarks>
        /// We might have to replace/link this to the data provider for IMDb so we don't have redundant logic
        /// </remarks>
        /// <param name="ImdbId"></param>
        /// <returns></returns>
        private static string getImdbDetailsPage(string ImdbId)
        {
            WebGrabber grabber = new WebGrabber("http://m.imdb.com/title/" + ImdbId);

            if (grabber.GetResponse())
            {
                return(HttpUtility.HtmlDecode(grabber.GetString()));
            }
            else
            {
                return(null);
            }
        }
Exemple #4
0
        private static string getJson(string url)
        {
            WebGrabber grabber = Utility.GetWebGrabberInstance(url);

            grabber.Encoding = Encoding.UTF8;

            if (grabber.GetResponse())
            {
                return(grabber.GetString());
            }
            else
            {
                return(null);
            }
        }
Exemple #5
0
        //function processes a string to translate.
        private string GetTranslation(string toTrans)
        {
            string translatedString = String.Empty;
            string url = String.Format("http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&langpair={0}&q={1}", _strLanguagePare, toTrans);

            WebGrabber webGrabber = new WebGrabber(url);

            webGrabber.GetResponse();
            translatedString = webGrabber.GetString();

            if (_debug)
            {
                logger.Debug("Translation tool: Uncleaned Translation: {0}", translatedString);
            }
            // remove all json from responce and return only translated string.
            if (translatedString.Length > 0)
            {
                translatedString = ResponseClean(translatedString);
            }

            return(translatedString);
        }
        // Retrieves an URL
        private string RetrieveUrl(Dictionary <string, string> variables)
        {
            string parsedUrl       = parseString(variables, url);
            string parsedUserAgent = parseString(variables, userAgent);
            string pageContents    = string.Empty;

            if (_useCaching && Context.Cache.ContainsKey(parsedUrl))
            {
                logger.Debug("Using Cached Version of URL: {0}", parsedUrl);
                return(Context.Cache[parsedUrl]);
            }

            if (Context.DebugMode)
            {
                logger.Debug("Retrieving URL: {0}", parsedUrl);
            }

            // Try to grab the document
            try {
                WebGrabber grabber = new WebGrabber(parsedUrl);
                grabber.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                grabber.UserAgent         = parsedUserAgent;
                grabber.Encoding          = encoding;
                grabber.Timeout           = timeout;
                grabber.TimeoutIncrement  = timeoutIncrement;
                grabber.MaxRetries        = maxRetries;
                grabber.AllowUnsafeHeader = allowUnsafeHeader;
                grabber.CookieHeader      = cookies;
                grabber.AcceptLanguage    = acceptLanguage;
                grabber.Debug             = Context.DebugMode;


                // Keep session / chaining
                string sessionKey = "urn://scraper/header/" + grabber.Request.RequestUri.Host;
                if (variables.ContainsKey(sessionKey))
                {
                    if (grabber.CookieHeader == null)
                    {
                        grabber.CookieHeader = variables[sessionKey];
                    }
                    else
                    {
                        grabber.CookieHeader = grabber.CookieHeader + "," + variables[sessionKey];
                    }
                }


                // Retrieve the document
                if (grabber.GetResponse())
                {
                    // save cookie session data for future requests
                    setVariable(variables, sessionKey, grabber.CookieHeader);

                    // grab the request results and store in our cache for later retrievals
                    pageContents = grabber.GetString();
                    if (_useCaching)
                    {
                        Context.Cache[parsedUrl] = pageContents;
                    }
                }
            }
            catch (Exception e) {
                if (e is ThreadAbortException)
                {
                    throw e;
                }

                logger.Warn("Could not connect to " + parsedUrl + ". " + e.Message);
            }

            return(pageContents);
        }