//______________________________________________________________________________________________________________________________ /// <summary> /// Crawls through the site gained from the field using setted level of depth. /// </summary> /// <exception cref="ArgumentNullException">Thrown if the website URL has not been gained yet.</exception> public void crawlThroughSite() { string mainSiteURL = this.getSiteURL(); if (mainSiteURL == string.Empty) { throw (new ArgumentNullException(mainSiteURL.GetType().Name, "The website URL is empty.")); } string mainSiteDirectoryName = this.determineQualifiedPath(FileSystemFlow.createRootDirectory(), mainSiteURL); ISet <string> absoluteLinks0 = this.performBasicCrawlingStep(mainSiteURL, mainSiteDirectoryName); this.performLevelCrawlingStep(absoluteLinks0, mainSiteDirectoryName, 1); }
//______________________________________________________________________________________________________________________________ /// <summary> /// Performes a basic step for the crawling procedure. /// (1) Connects with the website using an absolute URL. /// (2) Retrieves a set of an absolute links founded in the website content behind the given URL. /// (3) Saves the website content to the specified directory and with the specified file name. /// </summary> /// <param name="url">An absolute URL to proceed for.</param> /// <param name="directoryName">A target directory full path for saving the website content.</param> /// <returns>A set of absolute links retrieved from the website content of the given URL.</returns> private ISet <string> performBasicCrawlingStep(string url, string directoryName) { string websiteContent = string.Empty; if (this.getAsynchronousDownloadUse() == true) { websiteContent = this.downloadWebsiteContentAsynchronously(url); } else { websiteContent = this.downloadWebsiteContent(url); } if (websiteContent != string.Empty) { FileSystemFlow.createDirectory(directoryName); string filePath = this.determineQualifiedPath(directoryName, url + "." + CRAWLED_WEBSITES_FILE_EXTENSION); FileSystemFlow.saveTextToFile(filePath, websiteContent); } ISet <string> absoluteLinks = this.extractAbsoluteLinksFrom(websiteContent); return(absoluteLinks); }
//______________________________________________________________________________________________________________________________ /// <summary> /// Determines a qualified path. /// Qualified means without reserved to the Windows file system characters and with the proper length. /// If a combined path would be too long, the website URL name will be trimmed from the left side. /// </summary> /// <param name="directoryName">A directory name of the target destination location.</param> /// <param name="websiteURL">An untrusted name.</param> /// <returns>The Windows operating system qualified directory path.</returns> private string determineQualifiedPath(string directoryName, string websiteURL) { string qualifiedLengthPath = string.Empty; string mainSiteDirectoryName = string.Empty; try { string websiteName = FileSystemFlow.removeWindowsFileSystemReservedCharacters(websiteURL.Substring(websiteURL.IndexOf('.') + 1)); websiteName = FileSystemFlow.limitCharactersToFirst(FileSystemFlow.WINDOWS_QUALIFIED_FILENAME_LENGTH - 1, websiteName); mainSiteDirectoryName = System.IO.Path.Combine(directoryName, websiteName); if (mainSiteDirectoryName.Length > (FileSystemFlow.WINDOWS_QUALIFIED_DIRECTORY_LENGTH - 1)) { int trimmingSize = mainSiteDirectoryName.Length - FileSystemFlow.WINDOWS_QUALIFIED_DIRECTORY_LENGTH; websiteName = websiteName.Substring(trimmingSize + 1); mainSiteDirectoryName = System.IO.Path.Combine(directoryName, websiteName); } qualifiedLengthPath = mainSiteDirectoryName; } catch (ArgumentNullException x) { this.lastExceptionInfo.typeName = x.GetType().ToString(); this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; this.lastExceptionInfo.argName = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName); this.lastExceptionInfo.argValue = mainSiteDirectoryName.ToString(); this.lastExceptionInfo.message = x.Message; this.lastExceptionInfo.id = "[SC-5]"; string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue; StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine); } catch (ArgumentOutOfRangeException x) { this.lastExceptionInfo.typeName = x.GetType().ToString(); this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; this.lastExceptionInfo.argName = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName); this.lastExceptionInfo.argValue = mainSiteDirectoryName.ToString(); this.lastExceptionInfo.message = x.Message; this.lastExceptionInfo.id = "[SC-5]"; string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue; StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine); } catch (ArgumentException x) { this.lastExceptionInfo.typeName = x.GetType().ToString(); this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; this.lastExceptionInfo.argName = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName); this.lastExceptionInfo.argValue = mainSiteDirectoryName.ToString(); this.lastExceptionInfo.message = x.Message; this.lastExceptionInfo.id = "[SC-5]"; string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue; StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine); } catch (Exception x) { this.lastExceptionInfo.typeName = x.GetType().ToString(); this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; this.lastExceptionInfo.argName = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName); this.lastExceptionInfo.argValue = mainSiteDirectoryName.ToString(); this.lastExceptionInfo.message = x.Message; this.lastExceptionInfo.id = "[SC-5]"; string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue; StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine); } return(qualifiedLengthPath); }
//______________________________________________________________________________________________________________________________ /// <summary> /// It tries to redirect standard error stream into a file whose name is defined in a constant STDERR_FILENAME. /// </summary> /// <returns>'true' if function code has been traversed, 'false' when any error occured.</returns> public static bool tryToRedirectStdErr() { try { oldStdErrStream = Console.Error; newStdErrStream = new StreamWriter(STDERR_FILENAME, false); Console.SetError(newStdErrStream); if (newStdErrStream == null) { return(false); } isLastRedirectionSuccessfull = true; string appName = FileSystemFlow.getApplicationFullPath(); appName = appName.Substring(appName.LastIndexOf('\\') + 1); writeLine(Environment.NewLine); writeLine("============================================="); writeLine("Error log for: " + appName.ToString()); writeLine("Timestamp: " + DateTime.Now.ToString()); writeLine("============================================="); } catch (UnauthorizedAccessException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (NotSupportedException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (ArgumentNullException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (ArgumentOutOfRangeException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (ArgumentException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (DirectoryNotFoundException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (PathTooLongException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (IOException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (System.Security.SecurityException x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } catch (Exception x) { lastExceptionInfo.typeName = x.GetType().ToString(); lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; lastExceptionInfo.argName = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream); lastExceptionInfo.argValue = newStdErrStream.ToString(); lastExceptionInfo.message = x.Message; lastExceptionInfo.id = "[SEF-2]"; writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")"); writeLine(Environment.NewLine); isLastRedirectionSuccessfull = false; return(false); } return(true); }