public void GetFileNameFromUrl() { Assert.AreEqual("blah.wav", WebProcessor.GetFileNameFromUrl("https://www.moviewavs.com/Movies/blah.wav")); Assert.AreEqual("blah.wav", WebProcessor.GetFileNameFromUrl("https://www.moviewavs.com/blah.wav")); Assert.AreEqual(string.Empty, WebProcessor.GetFileNameFromUrl("https://www.moviewavs.com/")); // Eh, this doesn't work but no biggie: Assert.AreEqual(string.Empty, WebSearchManager.GetFileNameFromUrl("https://www.moviewavs.com")); }
public string GetUrlContents(string url, string header, string userAgent, Functions.LogMessageDelegate LogMessage) { string result = string.Empty; IWebObjectStorage objectStorage = new WebObjectStorageFileSystem(_webObjectRoot); WebObject cached = objectStorage.GetUrlObject(url); if (cached != null) { result = cached.Content; } else { result = WebProcessor.GetUrlContents(url, header, userAgent, LogMessage); if (!Functions.IsEmptyString(result)) { cached = new WebObject() { Url = url, Content = result, MimeType = "text/html" }; objectStorage.SetUrlObject(cached); } } return(result); }
public string GetSearchResults(string term, string clientIp, int resultsPageSize, int pageToGet, Functions.LogMessageDelegate LogMessage) { string result = string.Empty; // // Docs for google search: // // https://developers.google.com/web-search/docs/reference#_intro_fonje // const string baseAddr = @"http://yboss.yahooapis.com/ysearch/web"; // // Note: Paging is supported in the request. To ponder. // const string searchModifierKeywords = "sound clips wav mp3"; int startIndex = pageToGet * resultsPageSize; string yahooSearchPhrase = string.Format("{0} {1}", term, searchModifierKeywords); // http://yboss.yahooapis.com/ysearch/web?q=ipod string searchUrl = string.Format(@"{0}?q={1}&count={2}", baseAddr, yahooSearchPhrase, resultsPageSize); if (LogMessage != null) { LogMessage(string.Format("Searching yahoo for: \"{0}\"", searchUrl)); } result = WebProcessor.GetUrlContents(BuildAuthenticatedUrl(new Uri(searchUrl)), @"referrer:http://www.otamata.com", @"OtamataSoundSearchService", LogMessage); return(result); }
/// <summary> /// Get the search results JSON data from Google /// </summary> /// <param name="term">The plain search term. Some keywords are added to get better search results.</param> /// <param name="clientIp">The client IP address</param> /// <param name="resultsPageSize">The number of results per page (8 recommended)</param> /// <param name="pageToGet">The zero-based index of the page to get</param> /// <returns>The raw JSON from Google.</returns> public string GetSearchResults(string term, string clientIp, int resultsPageSize, int pageToGet, Functions.LogMessageDelegate LogMessage) { string result = string.Empty; // // Docs for google search: // // https://developers.google.com/web-search/docs/reference#_intro_fonje // const string baseAddr = @"https://ajax.googleapis.com/ajax/services/search/web"; // // Note: Paging is supported in the request. To ponder. // const string searchModifierKeywords = "sound clips wav mp3"; int startIndex = pageToGet * resultsPageSize; string googleSearchPhrase = string.Format("{0} {1}", term, searchModifierKeywords); string searchUrl = string.Format(@"{0}?q={1}&v=1.0&userip={2}&start={3}&rsz={4}", baseAddr, googleSearchPhrase, clientIp, startIndex, resultsPageSize); if (LogMessage != null) { LogMessage(string.Format("Searching google for: \"{0}\"", searchUrl)); } result = WebProcessor.GetUrlContents(searchUrl, @"referrer:http://www.otamata.com", @"OtamataSoundSearchService", LogMessage); return(result); }
/// <summary> /// From a JSON results string (must be from GetSearchResults()), grab all the urls /// </summary> /// <param name="rawJsonResults">The JSON string</param> /// <returns>A list of dynamic objects</returns> /// <remarks> /// Url = url["unescapedUrl"].ToString(), /// Index = index, /// Domain = WebProcessor.GetDomainOfUrl(url["unescapedUrl"].ToString()) /// </remarks> public IList <dynamic> GetResultUrls(string rawJsonResults) { IEnumerable temp = null; IList <dynamic> result = new List <dynamic>(); if (!Functions.IsEmptyString(rawJsonResults)) { JObject jsonResults = JObject.Parse(rawJsonResults); temp = jsonResults["responseData"]["results"].Children().Select( (url, index) => new { Url = url["unescapedUrl"].ToString(), Index = index, Domain = WebProcessor.GetDomainOfUrl(url["unescapedUrl"].ToString()) }); // // "temp" is some crazy type of variable. IEnumerable doesn't have a "Count" property, so it's pretty much useless. // Todo: figure more of this Linq stuff out. It seems cool, but so hard to use. // foreach (object item in temp) { result.Add(item); } } return(result); }
private string PrepareDataAndPostSession(string sessionId, bool appendData, ref NameValueCollection postParams) { //CacheProcessor.CreateDecryptedImages(sessionId); //List<string> decryptedImagePaths = new List<string>(CacheProcessor.GetDecryptedImagePaths(sessionId)); //if (decryptedImagePaths == null || decryptedImagePaths.Count <= 0) // return string.Empty; CacheProcessor.DecryptData(sessionId); List <string> loadedData = CacheProcessor.LoadData(sessionId); if ((loadedData == null || loadedData.Count <= 0) && !appendData) { CacheProcessor.DeleteSessionFolder(sessionId); return("EMPTY_FOLDER"); } CacheProcessor.CreateDecryptedImages(sessionId); List <string> decryptedImagePaths = new List <string>(CacheProcessor.GetDecryptedImagePaths(sessionId)); if (appendData) { postParams = _ServerApiProvider.AppendPostSessionParams(loadedData, CurrentContext.GetInstance().Session, CurrentContext.GetInstance().LoginData); } else { postParams = _ServerApiProvider.CreatePostSessionParams(loadedData); } string postUrl = _ServerApiProvider.CreatePostSessionUrl(); if (!WebProcessor.CheckInternetConnection()) { ErrorTimer.GetInstance().StartTimer(); return("CONNECTION_FAIL"); } return(WebProcessor.UploadFileWithParams(postUrl, _ServerApiProvider.PrepareFilesData(decryptedImagePaths, "screenshot_files[]", "image/jpeg"), postParams)); /* MOCK */ /*FAKE++; * * if (FAKE == 0) * return WebProcessor.UploadFileWithParams(postUrl, * _ServerApiProvider.PrepareFilesData(decryptedImagePaths), postParams); * else if (FAKE > 0 && FAKE < 3) * return "FAIL"; * else * return WebProcessor.UploadFileWithParams(postUrl, * _ServerApiProvider.PrepareFilesData(decryptedImagePaths), postParams);*/ /* MOCK */ }
public void FindText_Frame_ReturnsRectangleWithTextPosition() { IBotBrowser botBrowser = new ChromiumBrowserAdapter(); WebProcessor processor = new WebProcessor(botBrowser); ProcessorValueContext processorValueContext = new ProcessorValueContext(); processorValueContext.Processor = processor; Frame f = new Frame(processorValueContext); f.FindText("foo"); }
public string GetImageSearchResults(string term, string clientIp, int resultsPageSize, int pageToGet, Functions.LogMessageDelegate LogMessage) { // // Let's search for this bad boy! // string result = string.Empty; string size = "medium"; var uri = new Uri(string.Format("http://yboss.yahooapis.com/ysearch/images?dimensions={0}&q={1}", size, HttpUtility.UrlEncode(term))); string searchUrl = BuildAuthenticatedUrl(uri); result = WebProcessor.GetUrlContents(searchUrl, @"referrer:http://www.otamata.com", @"OtamataSoundSearchService", LogMessage); return(result); }
public bool GetVersionData(string email) { string url = _ServerApiProvider.CreateVersionUrl(email, CurrentContext.GetInstance().VersionData.CurrentVersion.ToString()); string response = WebProcessor.MakeGetRequest(url); Dictionary <double, int> versionData = _ServerApiProvider.AnalizeVersionResponse(response); if (versionData == null) { return(false); } CurrentContext.GetInstance().VersionData.NewestVersion = versionData.Keys.First(); CurrentContext.GetInstance().VersionData.MandatoryUpdate = versionData.Values.First(); return(true); }
// AHMED EDIT private void GetSavedEmailAndDetectVersion() { try { VersionController.GetInstance(); string email = string.Empty; RegistryProcessor.GetFromRegistry(CommonConst.REGISTRY_PATH, CommonConst.EMAIL_VALUE_NAME, ref email, RegistryProcessor.RegistryParts.HKEY_CURRENT_USER); if (!WebProcessor.CheckInternetConnection()) { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetNetworkLine("Connection Error")); MessageBox.Show("Connection Error"); CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN; IsInternetExists = false; return; } else { IsInternetExists = true; } if (!string.IsNullOrEmpty(email)) { CurrentContext.GetInstance().VersionData.VersionDetected = VersionController.GetInstance().GetVersionData(email); } else { CurrentContext.GetInstance().VersionData.VersionDetected = VersionController.GetInstance().GetVersionData(string.Empty); } if (CurrentContext.GetInstance().VersionData.VersionDetected) { _new_version_lb.TextAlign = ContentAlignment.MiddleCenter; DetectVersionState(); } else { CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN; } } catch { CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN; } }
private void _current_timer_tm_Tick(object sender, EventArgs e) { _local_time_al_lb.Text = string.Format("Local Time: {0:hh:mm tt}", DateTime.Now); _local_time_lb.Text = string.Format("Local Time: {0:hh:mm tt}", DateTime.Now); if (!IsInternetExists) { InternetConnectionCount++; if (InternetConnectionCount >= CommonConst.CHECK_CONNECTION_INTERVAL) { InternetConnectionCount = 0; if (WebProcessor.CheckInternetConnection()) { IsInternetExists = true; try { CurrentContext.GetInstance().VersionData.VersionDetected = VersionController.GetInstance().GetVersionData(string.Empty); if (CurrentContext.GetInstance().VersionData.VersionDetected) { _new_version_lb.TextAlign = ContentAlignment.MiddleCenter; DetectVersionState(); } else { CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN; } SetVersionButton(); ConfVersionPanel(); } catch { CurrentContext.GetInstance().VersionData.State = VersionState.UNKNOWN; } } else { IsInternetExists = false; } } } }
public LoginState Login(string login, string password) { if (_ServerApiProvider == null) { _ServerApiProvider = ServerApiFactory.CreateServerApiProvider(); } CurrentContext.GetInstance().LoginData = LoginDataFactory.CreateLoginData(); string url = _ServerApiProvider.CreateLoginUrl(login, password); string response = WebProcessor.MakeGetRequest(url); LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetNetworkLine(string.Format("server reply \"{0}\"", response))); LoginState result = _ServerApiProvider.AnalizeLoginSuccess(response); if (result == LoginState.LOGGED) { CurrentContext.GetInstance().LoginData.Login = login; CurrentContext.GetInstance().LoginData.Password = password; } return(result); }
private void OnInternalTimedEvent(object source, ElapsedEventArgs e) { _Count++; if (WebProcessor.CheckInternetConnection()) { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetNetworkLine("Exp. Internet Connection OK")); _InternalTimer.Stop(); _Handler.ConnectionStateChanged(true); _IsStarted = false; _Handler.CheckConnectionEventFired(); } else { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetNetworkLine("Exp. Internet Connection Fail")); _InternalTimer.Stop(); _InternalTimer.Interval = _Count < _Times.Length ? _Times[_Count] : _Times[_Times.Length - 1]; _InternalTimer.Start(); } }
public void GetUrlForObject_domainAndPage_pathRelativeToPage() { Assert.AreEqual(pathFullDomainWithDir, WebProcessor.GetUrlForObject(domainAndPage, pathRelativeToPage)); }
private void _login_btn_Click(object sender, EventArgs e) { if (string.IsNullOrEmpty(_username_Tb.Text) || string.IsNullOrEmpty(_password_Tb.Text)) { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetLoginFailedLine("Incorrect username or password")); MessageBox.Show("Login or password is empty"); } else { if (!CurrentContext.GetInstance().IsSessionDataLocked) { ThreadPool.QueueUserWorkItem(new WaitCallback((s) => { CurrentContext.GetInstance().IsSessionDataLocked = true; if (!WebProcessor.CheckInternetConnection()) { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetLoginFailedLine("Network unavailable")); this.Invoke((MethodInvoker) delegate() { MessageBox.Show("Network unavailable"); }); CurrentContext.GetInstance().IsSessionDataLocked = false; return; } LoginState loginState = LoginDataController.GetInstance().Login(_username_Tb.Text, _password_Tb.Text); if (loginState == LoginState.LOGGED) { this.Invoke((MethodInvoker) delegate() { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetLoginSuccessLine()); RegistryProcessor.SetToRegistry(CommonConst.REGISTRY_PATH, CommonConst.EMAIL_VALUE_NAME, CurrentContext.GetInstance().LoginData.Login, RegistryProcessor.RegistryParts.HKEY_CURRENT_USER); // AHMED EDITS string pwd = CurrentContext.GetInstance().LoginData.Password; pwd = StringCipher.Encrypt(pwd, CommonConst.DES_KEY); if (!string.IsNullOrEmpty(pwd)) { RegistryProcessor.SetToRegistry(CommonConst.REGISTRY_PATH, CommonConst.PWD_VALUE_NAME, pwd, RegistryProcessor.RegistryParts.HKEY_CURRENT_USER); } // AHMED EDITS MainForm mainForm = new MainForm(); mainForm.Show(); mainForm.Visible = false; mainForm.Left = this.Left; mainForm.Top = this.Top; mainForm.Size = this.Size; mainForm.Visible = true; IsHide = true; _balloon_ni.Visible = false; this.Hide(); LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetNavigationLine("Main page")); LogController.GetInstance().RemoveLogHandler(this); }); } else { this.Invoke((MethodInvoker) delegate() { if (loginState == LoginState.CONNECTION_FAIL) { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetLoginFailedLine("Web server did not responded")); MessageBox.Show("Connection Error"); } else if (loginState == LoginState.LOGIN_FAIL) { LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetLoginFailedLine("Incorrect username or password")); MessageBox.Show("Login Error"); } }); } CurrentContext.GetInstance().IsSessionDataLocked = false; })); } } }
public string SendBugReport(string report, bool isCrashReport) { if (!WebProcessor.CheckInternetConnection()) { return("Internet Connection Error"); } string url = SessionController.GetInstance().ServerApiProvider.CreateBugReportUrl(); string email = "*****@*****.**"; if (CurrentContext.GetInstance().LoginData != null && !string.IsNullOrEmpty(CurrentContext.GetInstance().LoginData.Login)) { email = CurrentContext.GetInstance().LoginData.Login; } else { string regEmail = string.Empty; RegistryProcessor.GetFromRegistry(CommonConst.REGISTRY_PATH, CommonConst.EMAIL_VALUE_NAME, ref regEmail, RegistryProcessor.RegistryParts.HKEY_CURRENT_USER); if (!string.IsNullOrEmpty(regEmail)) { email = regEmail; } } string currVersion = "1.00"; if (CurrentContext.GetInstance().VersionData != null) { currVersion = CurrentContext.GetInstance().VersionData.CurrentVersion.ToString(); } string token = CommonConst.BUG_REPORT_TOKEN; string logFile = LogController.GetInstance().IsLogFileExists(); NameValueCollection postParams = new NameValueCollection(); postParams.Add("from", email); postParams.Add("body", report); postParams.Add("tracker_version", currVersion); postParams.Add("token", token); if (isCrashReport) { postParams.Add("crash_report", "1"); } List <string> logFilePath = new List <string>(); logFilePath.Add(logFile); string resp = WebProcessor.UploadFileWithParams(url, SessionController.GetInstance().ServerApiProvider. PrepareFilesData(logFilePath, "activity_log", "text/plain"), postParams); LogController.GetInstance().LogData(LogController. GetInstance().LogFormat.GetNavigationLine("Server reply: " + resp)); return(resp); }
public void GetUrlForObject_domainEndingSlash_pathRelativeToPage() { Assert.AreEqual(pathFullDomain, WebProcessor.GetUrlForObject(domainEndingSlash, pathRelativeToPage)); }
public void GetUrlForObject_domainAndDirNoEndingSlash_pathFullDomain() { Assert.AreEqual(pathFullDomain, WebProcessor.GetUrlForObject(domainAndDirNoEndingSlash, pathFullDomain)); }
/// <summary> /// Grab a potential websoundsearch object at a url /// </summary> /// <param name="url">The url of the object</param> /// <param name="header">Additional header to include, if any</param> /// <param name="userAgent">The user agent to use, if any</param> /// <returns>A websearchsound object, with the properties populated if it's really a sound.</returns> private static websearchsound GetWebObjectAtUrl(string url, string header, string userAgent) { websearchsound result = new websearchsound(); const long MAX_SOUND_SIZE_BYTES = 1024 * 1000; // Let's cap at 1 MB var request = (HttpWebRequest)HttpWebRequest.Create(url); request.Timeout = 5000; // If it takes longer than 5 seconds to respond, we're in trouble. Let's bail if (!Functions.IsEmptyString(header)) { request.Headers.Add(header); } if (!Functions.IsEmptyString(userAgent)) { request.UserAgent = userAgent; } HttpWebResponse response = null; FileStream fileStream = null; try { response = (HttpWebResponse)request.GetResponse(); string foundContentType = response.ContentType.ToLower(); long responseSize = response.ContentLength; if (responseSize > MAX_SOUND_SIZE_BYTES) { LogMessage(string.Format("Won't download, too big: {0} (limit is {1})", responseSize, MAX_SOUND_SIZE_BYTES)); } else { result.contenttype = foundContentType; string outputExt = GetExtensionFromMimeType(foundContentType); string fileName = WebProcessor.GetFileNameFromUrl(url); result.issound = outputExt != string.Empty; result.filename = fileName; result.extension = outputExt; // // For debugging - setup variables to assist writing out the file to the cache dir // /* * string csd = Config.CacheSearchesDirectory; * string cgcsd = Config.Get(csd); * HttpServerUtility hsu = HttpContext.Current.Server; // Note: for multithreading - this will be NULL. Need to pass in a value. * string outputPath = hsu.MapPath(cgcsd); * string outputFile = Functions.CombineElementsWithDelimiter("\\", outputPath, string.Format("{0}.{1}", fileName.ReplaceAllNonAlphaNumericCharsInString(), outputExt)); */ if (result.issound) { /* To get raw bytes: */ var memStream = new MemoryStream(); try { // not sure if this will copy all the bytes: response.GetResponseStream().CopyTo(memStream); Functions.CopyStream(response.GetResponseStream(), memStream); result.soundbytes = memStream.ToArray(); long memStreamSizeBytes = memStream.Length; result.size = memStreamSizeBytes; } catch (Exception ex) { LogMessage(string.Format("Exception getting sound bytes for file \"{0}\", was: {1}", result.filename, ex.Message)); } finally { if (memStream != null) { memStream.Close(); } } // // Don't clog log up with successes, we're worried about the errors // // LogMessage(string.Format("Boom - snagged file \"{0}\" of size {1}", fileName, memStreamSizeBytes)); /* * const bool writeDebuggingFile = false; * * if (writeDebuggingFile && !File.Exists(outputFile)) * { * // Debugging - write to disk * fileStream = new FileStream(outputFile, FileMode.Create); * response.GetResponseStream().Position = 0; * Functions.CopyStream(response.GetResponseStream(), fileStream); * } */ } else { LogMessage(string.Format("Object at \"{0}\" not a sound, has mime type of \"{1}\"", url, foundContentType)); } } } catch (Exception ex) { // Crud. LogMessage(string.Format("Error doing stuff with file \"{0}\", was: {1}", url, ex.Message)); } finally { if (response != null) { response.Close(); } if (fileStream != null) { fileStream.Close(); } } return(result); }
/// <summary> /// Get all the sounds in the list of urls from the passed IDataSource and add the sounds to the passed websearch /// </summary> /// <param name="urls">The list of urls</param> /// <param name="dataSource">The datasource to use to look for sounds</param> /// <param name="currentSearch">The current search</param> private static void GetSoundsOnPages(IList <dynamic> urls, IDataSource dataSource, websearch currentSearch, IList <websearchsound> searchResultList, Functions.LogMessageDelegate LogMessage, int maxDepthToFollow) { //const int MAX_URLS_TO_SEARCH = 20; const int MAX_SOUNDS_PER_URL = 150; //int urlsProcessed = 0; HashSet <string> urlsOfObjectsSearched = new HashSet <string>(); // // Multithreading here for requesting the pages works pretty well speed-wise. Unfortunately, the regexes bog down the // server so badly that it becomes unresponsive for other users. So, don't do parallel on this outside loop. // // However, once the first page is processed, the sounds are webrequested asynchronously. So, the next page will // start being processed while the first page's sounds are still being downloaded. This works quite well, and // the performance is just about the same. So, let's stick with that. // foreach (dynamic url in urls) { string theUrl = url.Url; string domain = WebProcessor.GetDomainOfUrl(theUrl); if (unprocessableDomains.Contains(domain)) { LogMessage(string.Format("Skipping crappy domain: {0}", domain)); } else { LogMessage(string.Format("About to search for sounds on page: \"{0}\"", theUrl)); // string pageContent = WebProcessor.GetUrlContents(theUrl, null, null, LogMessage); // // todo: test this, make sure it works // string pageContent = dataSource.GetUrlContents(theUrl, null, GetUserAgent(), LogMessage); bool wasAborted = false; // // todo: combine sound links func with above function // IList <string> linksOnPage = GetSoundLinksOnPage(pageContent, ref wasAborted); // // For generating test case files, set breakpoint on if (wasAborted) below with condition: // // maxDepthToFollow == 1 // if (wasAborted) { LogMessage(string.Format("Had to abort link search on domain: {0}", domain)); lock (unprocessableDomains) { unprocessableDomains.Add(domain); } } LogMessage(string.Format("Found {0} links on \"{1}\"", linksOnPage.Count, theUrl)); #if MULTITHREADED Parallel.ForEach <string>(linksOnPage.Take(MAX_SOUNDS_PER_URL), partialLink => // <=-- normal operation - multithreaded #else foreach (string partialLink in linksOnPage.Take(MAX_SOUNDS_PER_URL)) // <=-- for debugging stuff, it's easier when not multithreaded #endif { string soundLink = WebProcessor.GetUrlForObject(theUrl, partialLink); LogMessage(string.Format("About to grab a potential sound here: \"{0}\"", soundLink)); if (!unprocessableDomains.Contains(domain) && IsNewSoundToGrab(urlsOfObjectsSearched, soundLink)) { websearchsound receivedObject = GetWebObjectAtUrl(soundLink, null, null); // // enhanced search: if not a sound and is text/html and response code is 200, search for sounds on THAT page // if (receivedObject.issound) { receivedObject.sourceurl = theUrl; receivedObject.sourceDomain = domain; receivedObject.searchResultOrder = url.Index; // // Check for dups // string md5Hash = Functions.GetMd5Hash(receivedObject.soundbytes); if (!HaveMd5ForSound(dataSource.CurrentSoundMd5s, md5Hash)) { dataSource.SetSoundInSearch(currentSearch, receivedObject); // // Performance optimization: we're not going to return the sound data itself with the search // so let's free up the mem here // receivedObject.soundbytes = null; searchResultList.Add(receivedObject); } else { LogMessage("Not adding sound - already in collection"); } } else if (receivedObject.contenttype.ToLower().StartsWith("text/html")) { // // We have another HTML page. Check that too? // if (maxDepthToFollow > 0) { LogMessage(string.Format("Going to drill down in this page - we're at max level: {0}", maxDepthToFollow)); GetSoundsOnPages(new List <dynamic>() { new { Url = soundLink, Index = url.Index } }, dataSource, currentSearch, searchResultList, LogMessage, maxDepthToFollow - 1); } else { LogMessage(string.Format("No more drilling down, we're as low as we can go")); } } } else { LogMessage("Won't process: already had sound from that url, or the domain is unprocessable!"); } #if MULTITHREADED });
public void GetUrlForObject_domainNoEndingSlash_externalLink() { Assert.AreEqual(externalLink, WebProcessor.GetUrlForObject(domainNoEndingSlash, externalLink)); }
/// <summary> /// Searches the given url once for information. /// </summary /// <param name="url"></param> /// <returns></returns> private void explore() { WebUtils utils = new WebUtils(); WebCache cache = new WebCache(); WebCrawler crawler = new WebCrawler(); WebHostPolicy policy = new WebHostPolicy(); WebProcessor processor = new WebProcessor(configuredSettings); ThreadSleeper sleeper = new ThreadSleeper(5000); // init the queue if not already created, if (sizeOfQueue() < 1) { initQueue(cache, currentUrl); } // traverse as long as the visited urls is less than the limit, is callable, and URL collection is not empty while (amountOfWebpageUrlsTraversed() < maxPageSearchLimit && callable && !isQueueEmpty()) { string currentWebpageUrl = dequeueWebpageUrl(traversalStyle, cache); // ensure the url is valid and has not been visited already if (!utils.isValidWebpageURL(currentWebpageUrl) || hasWebpageUrlBeenVisied(currentWebpageUrl)) { continue; } // try to timeout checking shared state and current thread handlePotentialTimeout(policy, utils, currentWebpageUrl); // if the crawl returns false, then it is an unsupported url if (!crawler.tryCrawl(currentWebpageUrl)) { continue; } setWebpageUrlAsVisited(currentWebpageUrl); // Retrieve all the texts found by the crawler Queue <string> texts = crawler.releaseTexts(); Queue <string> webpageUrls = crawler.releaseWebpages(); Queue <string> imageUrls = crawler.releaseImages(); string currentWebpageHost = crawler.releaseHost(); // filters the texts potentially and handles the links/images/etc WebPage page = processor.constructWebsite(texts, webpageUrls, imageUrls, currentWebpageUrl, currentWebpageHost); processor.tryBasicFilter(texts); // handles the cache to context communication for the newly discovered site URLS addWebpageUrlsToQueue(cache, page, webpageUrls, imageUrls); // enqueue the website to the hub sendToHub(page); // Update the state object sharedSearchContext.getContextInfo().addToThreadScore(contextualId, page.getSearchPhraseCount()); sharedSearchContext.getContextInfo().incrementUrlsTraversed(); // construct the display for the end user mainDisplayQueue.Enqueue(utils.createPrimaryDisplayView(page, contextualId)); // consturct the secondary display for the end user secondaryDisplayQueue.Enqueue(utils.createSecondaryDisplayView(sharedSearchContext)); // try to set webpage for timeout on all threads addOrUpdatePolicy(policy, currentWebpageHost); sleeper.trySleeping(); } secondaryDisplayQueue.Enqueue(utils.createSecondaryDisplayView(sharedSearchContext)); }
public void GetUrlForObject_domainAndPage_pathFullDomain() { Assert.AreEqual(pathFullDomain, WebProcessor.GetUrlForObject(domainAndPage, pathFullDomain)); }