Ejemplo n.º 1
0
 public void GetFileNameFromUrl()
 {
     Assert.AreEqual("blah.wav", WebProcessor.GetFileNameFromUrl("https://www.moviewavs.com/Movies/blah.wav"));
     Assert.AreEqual("blah.wav", WebProcessor.GetFileNameFromUrl("https://www.moviewavs.com/blah.wav"));
     Assert.AreEqual(string.Empty, WebProcessor.GetFileNameFromUrl("https://www.moviewavs.com/"));
     // Eh, this doesn't work but no biggie: Assert.AreEqual(string.Empty, WebSearchManager.GetFileNameFromUrl("https://www.moviewavs.com"));
 }
Ejemplo n.º 2
0
        public string GetUrlContents(string url, string header, string userAgent, Functions.LogMessageDelegate LogMessage)
        {
            string            result        = string.Empty;
            IWebObjectStorage objectStorage = new WebObjectStorageFileSystem(_webObjectRoot);

            WebObject cached = objectStorage.GetUrlObject(url);

            if (cached != null)
            {
                result = cached.Content;
            }
            else
            {
                result = WebProcessor.GetUrlContents(url, header, userAgent, LogMessage);

                if (!Functions.IsEmptyString(result))
                {
                    cached = new WebObject()
                    {
                        Url = url, Content = result, MimeType = "text/html"
                    };
                    objectStorage.SetUrlObject(cached);
                }
            }

            return(result);
        }
        public string GetSearchResults(string term, string clientIp, int resultsPageSize, int pageToGet, Functions.LogMessageDelegate LogMessage)
        {
            string result = string.Empty;

            //
            // Docs for google search:
            //
            // https://developers.google.com/web-search/docs/reference#_intro_fonje
            //
            const string baseAddr = @"http://yboss.yahooapis.com/ysearch/web";

            //
            // Note: Paging is supported in the request.  To ponder.
            //
            const string searchModifierKeywords = "sound clips wav mp3";
            int          startIndex             = pageToGet * resultsPageSize;

            string yahooSearchPhrase = string.Format("{0} {1}", term, searchModifierKeywords);

            // http://yboss.yahooapis.com/ysearch/web?q=ipod

            string searchUrl = string.Format(@"{0}?q={1}&count={2}", baseAddr, yahooSearchPhrase, resultsPageSize);

            if (LogMessage != null)
            {
                LogMessage(string.Format("Searching yahoo for: \"{0}\"", searchUrl));
            }

            result = WebProcessor.GetUrlContents(BuildAuthenticatedUrl(new Uri(searchUrl)), @"referrer:http://www.otamata.com", @"OtamataSoundSearchService", LogMessage);

            return(result);
        }
        /// <summary>
        /// Get the search results JSON data from Google
        /// </summary>
        /// <param name="term">The plain search term.  Some keywords are added to get better search results.</param>
        /// <param name="clientIp">The client IP address</param>
        /// <param name="resultsPageSize">The number of results per page (8 recommended)</param>
        /// <param name="pageToGet">The zero-based index of the page to get</param>
        /// <returns>The raw JSON from Google.</returns>
        public string GetSearchResults(string term, string clientIp, int resultsPageSize, int pageToGet, Functions.LogMessageDelegate LogMessage)
        {
            string result = string.Empty;

            //
            // Docs for google search:
            //
            // https://developers.google.com/web-search/docs/reference#_intro_fonje
            //
            const string baseAddr = @"https://ajax.googleapis.com/ajax/services/search/web";

            //
            // Note: Paging is supported in the request.  To ponder.
            //
            const string searchModifierKeywords = "sound clips wav mp3";
            int          startIndex             = pageToGet * resultsPageSize;

            string googleSearchPhrase = string.Format("{0} {1}", term, searchModifierKeywords);

            string searchUrl = string.Format(@"{0}?q={1}&v=1.0&userip={2}&start={3}&rsz={4}", baseAddr, googleSearchPhrase, clientIp, startIndex, resultsPageSize);

            if (LogMessage != null)
            {
                LogMessage(string.Format("Searching google for: \"{0}\"", searchUrl));
            }

            result = WebProcessor.GetUrlContents(searchUrl, @"referrer:http://www.otamata.com", @"OtamataSoundSearchService", LogMessage);

            return(result);
        }
        /// <summary>
        /// From a JSON results string (must be from GetSearchResults()), grab all the urls
        /// </summary>
        /// <param name="rawJsonResults">The JSON string</param>
        /// <returns>A list of dynamic objects</returns>
        /// <remarks>
        ///     Url = url["unescapedUrl"].ToString(),
        ///     Index = index,
        ///     Domain = WebProcessor.GetDomainOfUrl(url["unescapedUrl"].ToString())
        /// </remarks>
        public IList <dynamic> GetResultUrls(string rawJsonResults)
        {
            IEnumerable     temp   = null;
            IList <dynamic> result = new List <dynamic>();

            if (!Functions.IsEmptyString(rawJsonResults))
            {
                JObject jsonResults = JObject.Parse(rawJsonResults);

                temp = jsonResults["responseData"]["results"].Children().Select(
                    (url, index) => new
                {
                    Url    = url["unescapedUrl"].ToString(),
                    Index  = index,
                    Domain = WebProcessor.GetDomainOfUrl(url["unescapedUrl"].ToString())
                });

                //
                // "temp" is some crazy type of variable.  IEnumerable doesn't have a "Count" property, so it's pretty much useless.
                // Todo: figure more of this Linq stuff out.  It seems cool, but so hard to use.
                //

                foreach (object item in temp)
                {
                    result.Add(item);
                }
            }

            return(result);
        }
Ejemplo n.º 6
0
        private string PrepareDataAndPostSession(string sessionId,
                                                 bool appendData, ref NameValueCollection postParams)
        {
            //CacheProcessor.CreateDecryptedImages(sessionId);
            //List<string> decryptedImagePaths = new List<string>(CacheProcessor.GetDecryptedImagePaths(sessionId));
            //if (decryptedImagePaths == null || decryptedImagePaths.Count <= 0)
            //    return string.Empty;

            CacheProcessor.DecryptData(sessionId);
            List <string> loadedData = CacheProcessor.LoadData(sessionId);

            if ((loadedData == null || loadedData.Count <= 0) && !appendData)
            {
                CacheProcessor.DeleteSessionFolder(sessionId);
                return("EMPTY_FOLDER");
            }

            CacheProcessor.CreateDecryptedImages(sessionId);
            List <string> decryptedImagePaths = new List <string>(CacheProcessor.GetDecryptedImagePaths(sessionId));

            if (appendData)
            {
                postParams = _ServerApiProvider.AppendPostSessionParams(loadedData,
                                                                        CurrentContext.GetInstance().Session, CurrentContext.GetInstance().LoginData);
            }
            else
            {
                postParams = _ServerApiProvider.CreatePostSessionParams(loadedData);
            }

            string postUrl = _ServerApiProvider.CreatePostSessionUrl();

            if (!WebProcessor.CheckInternetConnection())
            {
                ErrorTimer.GetInstance().StartTimer();
                return("CONNECTION_FAIL");
            }

            return(WebProcessor.UploadFileWithParams(postUrl,
                                                     _ServerApiProvider.PrepareFilesData(decryptedImagePaths, "screenshot_files[]", "image/jpeg"), postParams));

            /* MOCK */

            /*FAKE++;
             *
             * if (FAKE == 0)
             *  return WebProcessor.UploadFileWithParams(postUrl,
             *  _ServerApiProvider.PrepareFilesData(decryptedImagePaths), postParams);
             * else if (FAKE > 0 && FAKE < 3)
             *  return "FAIL";
             * else
             *  return WebProcessor.UploadFileWithParams(postUrl,
             *  _ServerApiProvider.PrepareFilesData(decryptedImagePaths), postParams);*/

            /* MOCK */
        }
Ejemplo n.º 7
0
        public void FindText_Frame_ReturnsRectangleWithTextPosition()
        {
            IBotBrowser           botBrowser            = new ChromiumBrowserAdapter();
            WebProcessor          processor             = new WebProcessor(botBrowser);
            ProcessorValueContext processorValueContext = new ProcessorValueContext();

            processorValueContext.Processor = processor;
            Frame f = new Frame(processorValueContext);

            f.FindText("foo");
        }
        public string GetImageSearchResults(string term, string clientIp, int resultsPageSize, int pageToGet, Functions.LogMessageDelegate LogMessage)
        {
            //
            // Let's search for this bad boy!
            //
            string result = string.Empty;

            string size = "medium";
            var    uri  = new Uri(string.Format("http://yboss.yahooapis.com/ysearch/images?dimensions={0}&q={1}", size, HttpUtility.UrlEncode(term)));

            string searchUrl = BuildAuthenticatedUrl(uri);

            result = WebProcessor.GetUrlContents(searchUrl, @"referrer:http://www.otamata.com", @"OtamataSoundSearchService", LogMessage);

            return(result);
        }
Ejemplo n.º 9
0
        public bool GetVersionData(string email)
        {
            string url = _ServerApiProvider.CreateVersionUrl(email,
                                                             CurrentContext.GetInstance().VersionData.CurrentVersion.ToString());

            string response = WebProcessor.MakeGetRequest(url);
            Dictionary <double, int> versionData = _ServerApiProvider.AnalizeVersionResponse(response);

            if (versionData == null)
            {
                return(false);
            }

            CurrentContext.GetInstance().VersionData.NewestVersion = versionData.Keys.First();
            CurrentContext.GetInstance().VersionData.MandatoryUpdate = versionData.Values.First();
            return(true);
        }
Ejemplo n.º 10
0
        // AHMED EDIT

        private void GetSavedEmailAndDetectVersion()
        {
            try
            {
                VersionController.GetInstance();
                string email = string.Empty;
                RegistryProcessor.GetFromRegistry(CommonConst.REGISTRY_PATH,
                                                  CommonConst.EMAIL_VALUE_NAME, ref email, RegistryProcessor.RegistryParts.HKEY_CURRENT_USER);
                if (!WebProcessor.CheckInternetConnection())
                {
                    LogController.GetInstance().LogData(LogController.
                                                        GetInstance().LogFormat.GetNetworkLine("Connection Error"));
                    MessageBox.Show("Connection Error");
                    CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN;
                    IsInternetExists = false;
                    return;
                }
                else
                {
                    IsInternetExists = true;
                }

                if (!string.IsNullOrEmpty(email))
                {
                    CurrentContext.GetInstance().VersionData.VersionDetected =
                        VersionController.GetInstance().GetVersionData(email);
                }
                else
                {
                    CurrentContext.GetInstance().VersionData.VersionDetected =
                        VersionController.GetInstance().GetVersionData(string.Empty);
                }

                if (CurrentContext.GetInstance().VersionData.VersionDetected)
                {
                    _new_version_lb.TextAlign = ContentAlignment.MiddleCenter;
                    DetectVersionState();
                }
                else
                {
                    CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN;
                }
            }
            catch
            { CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN; }
        }
Ejemplo n.º 11
0
        private void _current_timer_tm_Tick(object sender, EventArgs e)
        {
            _local_time_al_lb.Text = string.Format("Local Time: {0:hh:mm tt}", DateTime.Now);
            _local_time_lb.Text    = string.Format("Local Time: {0:hh:mm tt}", DateTime.Now);

            if (!IsInternetExists)
            {
                InternetConnectionCount++;
                if (InternetConnectionCount >= CommonConst.CHECK_CONNECTION_INTERVAL)
                {
                    InternetConnectionCount = 0;
                    if (WebProcessor.CheckInternetConnection())
                    {
                        IsInternetExists = true;
                        try
                        {
                            CurrentContext.GetInstance().VersionData.VersionDetected =
                                VersionController.GetInstance().GetVersionData(string.Empty);
                            if (CurrentContext.GetInstance().VersionData.VersionDetected)
                            {
                                _new_version_lb.TextAlign = ContentAlignment.MiddleCenter;
                                DetectVersionState();
                            }
                            else
                            {
                                CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN;
                            }

                            SetVersionButton();
                            ConfVersionPanel();
                        }
                        catch
                        { CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN; }
                    }
                    else
                    {
                        IsInternetExists = false;
                    }
                }
            }
        }
Ejemplo n.º 12
0
        public LoginState Login(string login, string password)
        {
            if (_ServerApiProvider == null)
            {
                _ServerApiProvider = ServerApiFactory.CreateServerApiProvider();
            }

            CurrentContext.GetInstance().LoginData = LoginDataFactory.CreateLoginData();
            string url      = _ServerApiProvider.CreateLoginUrl(login, password);
            string response = WebProcessor.MakeGetRequest(url);

            LogController.GetInstance().LogData(LogController.
                                                GetInstance().LogFormat.GetNetworkLine(string.Format("server reply \"{0}\"", response)));
            LoginState result = _ServerApiProvider.AnalizeLoginSuccess(response);

            if (result == LoginState.LOGGED)
            {
                CurrentContext.GetInstance().LoginData.Login = login;
                CurrentContext.GetInstance().LoginData.Password = password;
            }
            return(result);
        }
Ejemplo n.º 13
0
        private void OnInternalTimedEvent(object source, ElapsedEventArgs e)
        {
            _Count++;

            if (WebProcessor.CheckInternetConnection())
            {
                LogController.GetInstance().LogData(LogController.
                                                    GetInstance().LogFormat.GetNetworkLine("Exp. Internet Connection OK"));
                _InternalTimer.Stop();
                _Handler.ConnectionStateChanged(true);
                _IsStarted = false;
                _Handler.CheckConnectionEventFired();
            }
            else
            {
                LogController.GetInstance().LogData(LogController.
                                                    GetInstance().LogFormat.GetNetworkLine("Exp. Internet Connection Fail"));
                _InternalTimer.Stop();
                _InternalTimer.Interval = _Count < _Times.Length ? _Times[_Count] : _Times[_Times.Length - 1];
                _InternalTimer.Start();
            }
        }
Ejemplo n.º 14
0
 public void GetUrlForObject_domainAndPage_pathRelativeToPage()
 {
     Assert.AreEqual(pathFullDomainWithDir, WebProcessor.GetUrlForObject(domainAndPage, pathRelativeToPage));
 }
Ejemplo n.º 15
0
        private void _login_btn_Click(object sender, EventArgs e)
        {
            if (string.IsNullOrEmpty(_username_Tb.Text) || string.IsNullOrEmpty(_password_Tb.Text))
            {
                LogController.GetInstance().LogData(LogController.
                                                    GetInstance().LogFormat.GetLoginFailedLine("Incorrect username or password"));
                MessageBox.Show("Login or password is empty");
            }
            else
            {
                if (!CurrentContext.GetInstance().IsSessionDataLocked)
                {
                    ThreadPool.QueueUserWorkItem(new WaitCallback((s) =>
                    {
                        CurrentContext.GetInstance().IsSessionDataLocked = true;
                        if (!WebProcessor.CheckInternetConnection())
                        {
                            LogController.GetInstance().LogData(LogController.
                                                                GetInstance().LogFormat.GetLoginFailedLine("Network unavailable"));
                            this.Invoke((MethodInvoker) delegate()
                            {
                                MessageBox.Show("Network unavailable");
                            });
                            CurrentContext.GetInstance().IsSessionDataLocked = false;

                            return;
                        }
                        LoginState loginState = LoginDataController.GetInstance().Login(_username_Tb.Text, _password_Tb.Text);
                        if (loginState == LoginState.LOGGED)
                        {
                            this.Invoke((MethodInvoker) delegate()
                            {
                                LogController.GetInstance().LogData(LogController.
                                                                    GetInstance().LogFormat.GetLoginSuccessLine());
                                RegistryProcessor.SetToRegistry(CommonConst.REGISTRY_PATH,
                                                                CommonConst.EMAIL_VALUE_NAME, CurrentContext.GetInstance().LoginData.Login,
                                                                RegistryProcessor.RegistryParts.HKEY_CURRENT_USER);

                                // AHMED EDITS
                                string pwd = CurrentContext.GetInstance().LoginData.Password;
                                pwd        = StringCipher.Encrypt(pwd, CommonConst.DES_KEY);
                                if (!string.IsNullOrEmpty(pwd))
                                {
                                    RegistryProcessor.SetToRegistry(CommonConst.REGISTRY_PATH,
                                                                    CommonConst.PWD_VALUE_NAME, pwd, RegistryProcessor.RegistryParts.HKEY_CURRENT_USER);
                                }
                                // AHMED EDITS

                                MainForm mainForm = new MainForm();
                                mainForm.Show();
                                mainForm.Visible    = false;
                                mainForm.Left       = this.Left;
                                mainForm.Top        = this.Top;
                                mainForm.Size       = this.Size;
                                mainForm.Visible    = true;
                                IsHide              = true;
                                _balloon_ni.Visible = false;
                                this.Hide();
                                LogController.GetInstance().LogData(LogController.
                                                                    GetInstance().LogFormat.GetNavigationLine("Main page"));
                                LogController.GetInstance().RemoveLogHandler(this);
                            });
                        }
                        else
                        {
                            this.Invoke((MethodInvoker) delegate()
                            {
                                if (loginState == LoginState.CONNECTION_FAIL)
                                {
                                    LogController.GetInstance().LogData(LogController.
                                                                        GetInstance().LogFormat.GetLoginFailedLine("Web server did not responded"));
                                    MessageBox.Show("Connection Error");
                                }
                                else if (loginState == LoginState.LOGIN_FAIL)
                                {
                                    LogController.GetInstance().LogData(LogController.
                                                                        GetInstance().LogFormat.GetLoginFailedLine("Incorrect username or password"));
                                    MessageBox.Show("Login Error");
                                }
                            });
                        }
                        CurrentContext.GetInstance().IsSessionDataLocked = false;
                    }));
                }
            }
        }
Ejemplo n.º 16
0
        public string SendBugReport(string report, bool isCrashReport)
        {
            if (!WebProcessor.CheckInternetConnection())
            {
                return("Internet Connection Error");
            }

            string url = SessionController.GetInstance().ServerApiProvider.CreateBugReportUrl();

            string email = "*****@*****.**";

            if (CurrentContext.GetInstance().LoginData != null &&
                !string.IsNullOrEmpty(CurrentContext.GetInstance().LoginData.Login))
            {
                email = CurrentContext.GetInstance().LoginData.Login;
            }
            else
            {
                string regEmail = string.Empty;
                RegistryProcessor.GetFromRegistry(CommonConst.REGISTRY_PATH,
                                                  CommonConst.EMAIL_VALUE_NAME, ref regEmail, RegistryProcessor.RegistryParts.HKEY_CURRENT_USER);
                if (!string.IsNullOrEmpty(regEmail))
                {
                    email = regEmail;
                }
            }

            string currVersion = "1.00";

            if (CurrentContext.GetInstance().VersionData != null)
            {
                currVersion = CurrentContext.GetInstance().VersionData.CurrentVersion.ToString();
            }

            string token = CommonConst.BUG_REPORT_TOKEN;

            string logFile = LogController.GetInstance().IsLogFileExists();

            NameValueCollection postParams = new NameValueCollection();

            postParams.Add("from", email);
            postParams.Add("body", report);
            postParams.Add("tracker_version", currVersion);
            postParams.Add("token", token);
            if (isCrashReport)
            {
                postParams.Add("crash_report", "1");
            }

            List <string> logFilePath = new List <string>();

            logFilePath.Add(logFile);
            string resp = WebProcessor.UploadFileWithParams(url,
                                                            SessionController.GetInstance().ServerApiProvider.
                                                            PrepareFilesData(logFilePath, "activity_log", "text/plain"), postParams);

            LogController.GetInstance().LogData(LogController.
                                                GetInstance().LogFormat.GetNavigationLine("Server reply: " + resp));

            return(resp);
        }
Ejemplo n.º 17
0
 public void GetUrlForObject_domainEndingSlash_pathRelativeToPage()
 {
     Assert.AreEqual(pathFullDomain, WebProcessor.GetUrlForObject(domainEndingSlash, pathRelativeToPage));
 }
Ejemplo n.º 18
0
 public void GetUrlForObject_domainAndDirNoEndingSlash_pathFullDomain()
 {
     Assert.AreEqual(pathFullDomain, WebProcessor.GetUrlForObject(domainAndDirNoEndingSlash, pathFullDomain));
 }
Ejemplo n.º 19
0
        /// <summary>
        /// Grab a potential websoundsearch object at a url
        /// </summary>
        /// <param name="url">The url of the object</param>
        /// <param name="header">Additional header to include, if any</param>
        /// <param name="userAgent">The user agent to use, if any</param>
        /// <returns>A websearchsound object, with the properties populated if it's really a sound.</returns>
        private static websearchsound GetWebObjectAtUrl(string url, string header, string userAgent)
        {
            websearchsound result = new websearchsound();

            const long MAX_SOUND_SIZE_BYTES = 1024 * 1000;  // Let's cap at 1 MB

            var request = (HttpWebRequest)HttpWebRequest.Create(url);

            request.Timeout = 5000;                         // If it takes longer than 5 seconds to respond, we're in trouble.  Let's bail

            if (!Functions.IsEmptyString(header))
            {
                request.Headers.Add(header);
            }

            if (!Functions.IsEmptyString(userAgent))
            {
                request.UserAgent = userAgent;
            }

            HttpWebResponse response   = null;
            FileStream      fileStream = null;

            try
            {
                response = (HttpWebResponse)request.GetResponse();

                string foundContentType = response.ContentType.ToLower();
                long   responseSize     = response.ContentLength;

                if (responseSize > MAX_SOUND_SIZE_BYTES)
                {
                    LogMessage(string.Format("Won't download, too big: {0} (limit is {1})", responseSize, MAX_SOUND_SIZE_BYTES));
                }
                else
                {
                    result.contenttype = foundContentType;
                    string outputExt = GetExtensionFromMimeType(foundContentType);
                    string fileName  = WebProcessor.GetFileNameFromUrl(url);

                    result.issound   = outputExt != string.Empty;
                    result.filename  = fileName;
                    result.extension = outputExt;

                    //
                    // For debugging - setup variables to assist writing out the file to the cache dir
                    //

                    /*
                     * string csd = Config.CacheSearchesDirectory;
                     * string cgcsd = Config.Get(csd);
                     * HttpServerUtility hsu = HttpContext.Current.Server; // Note: for multithreading - this will be NULL.  Need to pass in a value.
                     * string outputPath = hsu.MapPath(cgcsd);
                     * string outputFile = Functions.CombineElementsWithDelimiter("\\", outputPath, string.Format("{0}.{1}", fileName.ReplaceAllNonAlphaNumericCharsInString(), outputExt));
                     */

                    if (result.issound)
                    {
                        /* To get raw bytes:
                         */
                        var memStream = new MemoryStream();

                        try
                        {
                            // not sure if this will copy all the bytes: response.GetResponseStream().CopyTo(memStream);
                            Functions.CopyStream(response.GetResponseStream(), memStream);
                            result.soundbytes = memStream.ToArray();
                            long memStreamSizeBytes = memStream.Length;
                            result.size = memStreamSizeBytes;
                        }
                        catch (Exception ex)
                        {
                            LogMessage(string.Format("Exception getting sound bytes for file \"{0}\", was: {1}", result.filename, ex.Message));
                        }
                        finally
                        {
                            if (memStream != null)
                            {
                                memStream.Close();
                            }
                        }
                        //
                        // Don't clog log up with successes, we're worried about the errors
                        //
                        // LogMessage(string.Format("Boom - snagged file \"{0}\" of size {1}", fileName, memStreamSizeBytes));

                        /*
                         * const bool writeDebuggingFile = false;
                         *
                         * if (writeDebuggingFile && !File.Exists(outputFile))
                         * {
                         *  // Debugging - write to disk
                         *  fileStream = new FileStream(outputFile, FileMode.Create);
                         *  response.GetResponseStream().Position = 0;
                         *  Functions.CopyStream(response.GetResponseStream(), fileStream);
                         * }
                         */
                    }
                    else
                    {
                        LogMessage(string.Format("Object at \"{0}\" not a sound, has mime type of \"{1}\"", url, foundContentType));
                    }
                }
            }
            catch (Exception ex)
            {
                // Crud.
                LogMessage(string.Format("Error doing stuff with file \"{0}\", was: {1}", url, ex.Message));
            }
            finally
            {
                if (response != null)
                {
                    response.Close();
                }

                if (fileStream != null)
                {
                    fileStream.Close();
                }
            }

            return(result);
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Get all the sounds in the list of urls from the passed IDataSource and add the sounds to the passed websearch
        /// </summary>
        /// <param name="urls">The list of urls</param>
        /// <param name="dataSource">The datasource to use to look for sounds</param>
        /// <param name="currentSearch">The current search</param>
        private static void GetSoundsOnPages(IList <dynamic> urls, IDataSource dataSource, websearch currentSearch, IList <websearchsound> searchResultList, Functions.LogMessageDelegate LogMessage, int maxDepthToFollow)
        {
            //const int MAX_URLS_TO_SEARCH = 20;
            const int MAX_SOUNDS_PER_URL = 150;
            //int urlsProcessed = 0;
            HashSet <string> urlsOfObjectsSearched = new HashSet <string>();

            //
            // Multithreading here for requesting the pages works pretty well speed-wise.  Unfortunately, the regexes bog down the
            // server so badly that it becomes unresponsive for other users.  So, don't do parallel on this outside loop.
            //
            // However, once the first page is processed, the sounds are webrequested asynchronously.  So, the next page will
            // start being processed while the first page's sounds are still being downloaded.  This works quite well, and
            // the performance is just about the same.  So, let's stick with that.
            //

            foreach (dynamic url in urls)
            {
                string theUrl = url.Url;
                string domain = WebProcessor.GetDomainOfUrl(theUrl);

                if (unprocessableDomains.Contains(domain))
                {
                    LogMessage(string.Format("Skipping crappy domain: {0}", domain));
                }
                else
                {
                    LogMessage(string.Format("About to search for sounds on page: \"{0}\"", theUrl));

                    // string pageContent = WebProcessor.GetUrlContents(theUrl, null, null, LogMessage);

                    //
                    // todo: test this, make sure it works
                    //
                    string pageContent = dataSource.GetUrlContents(theUrl, null, GetUserAgent(), LogMessage);

                    bool wasAborted = false;

                    //
                    // todo: combine sound links func with above function
                    //
                    IList <string> linksOnPage = GetSoundLinksOnPage(pageContent, ref wasAborted);

                    //
                    // For generating test case files, set breakpoint on if (wasAborted) below with condition:
                    //
                    // maxDepthToFollow == 1
                    //

                    if (wasAborted)
                    {
                        LogMessage(string.Format("Had to abort link search on domain: {0}", domain));

                        lock (unprocessableDomains)
                        {
                            unprocessableDomains.Add(domain);
                        }
                    }

                    LogMessage(string.Format("Found {0} links on \"{1}\"", linksOnPage.Count, theUrl));


#if MULTITHREADED
                    Parallel.ForEach <string>(linksOnPage.Take(MAX_SOUNDS_PER_URL), partialLink => // <=-- normal operation - multithreaded
#else
                    foreach (string partialLink in linksOnPage.Take(MAX_SOUNDS_PER_URL))           // <=-- for debugging stuff, it's easier when not multithreaded
#endif
                    {
                        string soundLink = WebProcessor.GetUrlForObject(theUrl, partialLink);

                        LogMessage(string.Format("About to grab a potential sound here: \"{0}\"", soundLink));

                        if (!unprocessableDomains.Contains(domain) && IsNewSoundToGrab(urlsOfObjectsSearched, soundLink))
                        {
                            websearchsound receivedObject = GetWebObjectAtUrl(soundLink, null, null);

                            //
                            // enhanced search: if not a sound and is text/html and response code is 200, search for sounds on THAT page
                            //

                            if (receivedObject.issound)
                            {
                                receivedObject.sourceurl         = theUrl;
                                receivedObject.sourceDomain      = domain;
                                receivedObject.searchResultOrder = url.Index;

                                //
                                // Check for dups
                                //
                                string md5Hash = Functions.GetMd5Hash(receivedObject.soundbytes);

                                if (!HaveMd5ForSound(dataSource.CurrentSoundMd5s, md5Hash))
                                {
                                    dataSource.SetSoundInSearch(currentSearch, receivedObject);

                                    //
                                    // Performance optimization: we're not going to return the sound data itself with the search
                                    // so let's free up the mem here
                                    //
                                    receivedObject.soundbytes = null;

                                    searchResultList.Add(receivedObject);
                                }
                                else
                                {
                                    LogMessage("Not adding sound - already in collection");
                                }
                            }
                            else if (receivedObject.contenttype.ToLower().StartsWith("text/html"))
                            {
                                //
                                // We have another HTML page.  Check that too?
                                //
                                if (maxDepthToFollow > 0)
                                {
                                    LogMessage(string.Format("Going to drill down in this page - we're at max level: {0}", maxDepthToFollow));
                                    GetSoundsOnPages(new List <dynamic>()
                                    {
                                        new { Url = soundLink, Index = url.Index }
                                    }, dataSource, currentSearch, searchResultList, LogMessage, maxDepthToFollow - 1);
                                }
                                else
                                {
                                    LogMessage(string.Format("No more drilling down, we're as low as we can go"));
                                }
                            }
                        }
                        else
                        {
                            LogMessage("Won't process: already had sound from that url, or the domain is unprocessable!");
                        }
#if MULTITHREADED
                    });
Ejemplo n.º 21
0
 public void GetUrlForObject_domainNoEndingSlash_externalLink()
 {
     Assert.AreEqual(externalLink, WebProcessor.GetUrlForObject(domainNoEndingSlash, externalLink));
 }
Ejemplo n.º 22
0
        /// <summary>
        /// Searches the given url once for information.
        /// </summary
        /// <param name="url"></param>
        /// <returns></returns>
        private void explore()
        {
            WebUtils      utils     = new WebUtils();
            WebCache      cache     = new WebCache();
            WebCrawler    crawler   = new WebCrawler();
            WebHostPolicy policy    = new WebHostPolicy();
            WebProcessor  processor = new WebProcessor(configuredSettings);
            ThreadSleeper sleeper   = new ThreadSleeper(5000);

            // init the queue if not already created,
            if (sizeOfQueue() < 1)
            {
                initQueue(cache, currentUrl);
            }

            // traverse as long as the visited urls is less than the limit, is callable, and URL collection is not empty
            while (amountOfWebpageUrlsTraversed() < maxPageSearchLimit && callable && !isQueueEmpty())
            {
                string currentWebpageUrl = dequeueWebpageUrl(traversalStyle, cache);

                // ensure the url is valid and has not been visited already
                if (!utils.isValidWebpageURL(currentWebpageUrl) || hasWebpageUrlBeenVisied(currentWebpageUrl))
                {
                    continue;
                }

                // try to timeout checking shared state and current thread
                handlePotentialTimeout(policy, utils, currentWebpageUrl);

                // if the crawl returns false, then it is an unsupported url
                if (!crawler.tryCrawl(currentWebpageUrl))
                {
                    continue;
                }

                setWebpageUrlAsVisited(currentWebpageUrl);

                // Retrieve all the texts found by the crawler
                Queue <string> texts              = crawler.releaseTexts();
                Queue <string> webpageUrls        = crawler.releaseWebpages();
                Queue <string> imageUrls          = crawler.releaseImages();
                string         currentWebpageHost = crawler.releaseHost();

                // filters the texts potentially and handles the links/images/etc
                WebPage page = processor.constructWebsite(texts, webpageUrls, imageUrls, currentWebpageUrl, currentWebpageHost);
                processor.tryBasicFilter(texts);

                // handles the cache to context communication for the newly discovered site URLS
                addWebpageUrlsToQueue(cache, page, webpageUrls, imageUrls);
                // enqueue the website to the hub
                sendToHub(page);

                // Update the state object
                sharedSearchContext.getContextInfo().addToThreadScore(contextualId, page.getSearchPhraseCount());
                sharedSearchContext.getContextInfo().incrementUrlsTraversed();

                // construct the display for the end user
                mainDisplayQueue.Enqueue(utils.createPrimaryDisplayView(page, contextualId));

                // consturct the secondary display for the end user
                secondaryDisplayQueue.Enqueue(utils.createSecondaryDisplayView(sharedSearchContext));

                // try to set webpage for timeout on all threads
                addOrUpdatePolicy(policy, currentWebpageHost);
                sleeper.trySleeping();
            }
            secondaryDisplayQueue.Enqueue(utils.createSecondaryDisplayView(sharedSearchContext));
        }
Ejemplo n.º 23
0
 public void GetUrlForObject_domainAndPage_pathFullDomain()
 {
     Assert.AreEqual(pathFullDomain, WebProcessor.GetUrlForObject(domainAndPage, pathFullDomain));
 }