Exemple #1
0
        //______________________________________________________________________________________________________________________________

        /// <summary>
        /// Crawls through the site gained from the field using setted level of depth.
        /// </summary>
        /// <exception cref="ArgumentNullException">Thrown if the website URL has not been gained yet.</exception>

        public void crawlThroughSite()
        {
            string mainSiteURL = this.getSiteURL();

            if (mainSiteURL == string.Empty)
            {
                throw (new ArgumentNullException(mainSiteURL.GetType().Name, "The website URL is empty."));
            }

            string        mainSiteDirectoryName = this.determineQualifiedPath(FileSystemFlow.createRootDirectory(), mainSiteURL);
            ISet <string> absoluteLinks0        = this.performBasicCrawlingStep(mainSiteURL, mainSiteDirectoryName);

            this.performLevelCrawlingStep(absoluteLinks0, mainSiteDirectoryName, 1);
        }
Exemple #2
0
        //______________________________________________________________________________________________________________________________

        /// <summary>
        /// Performes a basic step for the crawling procedure.
        /// (1) Connects with the website using an absolute URL.
        /// (2) Retrieves a set of an absolute links founded in the website content behind the given URL.
        /// (3) Saves the website content to the specified directory and with the specified file name.
        /// </summary>
        /// <param name="url">An absolute URL to proceed for.</param>
        /// <param name="directoryName">A target directory full path for saving the website content.</param>
        /// <returns>A set of absolute links retrieved from the website content of the given URL.</returns>

        private ISet <string> performBasicCrawlingStep(string url, string directoryName)
        {
            string websiteContent = string.Empty;

            if (this.getAsynchronousDownloadUse() == true)
            {
                websiteContent = this.downloadWebsiteContentAsynchronously(url);
            }
            else
            {
                websiteContent = this.downloadWebsiteContent(url);
            }

            if (websiteContent != string.Empty)
            {
                FileSystemFlow.createDirectory(directoryName);
                string filePath = this.determineQualifiedPath(directoryName, url + "." + CRAWLED_WEBSITES_FILE_EXTENSION);
                FileSystemFlow.saveTextToFile(filePath, websiteContent);
            }

            ISet <string> absoluteLinks = this.extractAbsoluteLinksFrom(websiteContent);

            return(absoluteLinks);
        }
Exemple #3
0
        //______________________________________________________________________________________________________________________________

        /// <summary>
        /// Determines a qualified path.
        /// Qualified means without reserved to the Windows file system characters and with the proper length.
        /// If a combined path would be too long, the website URL name will be trimmed from the left side.
        /// </summary>
        /// <param name="directoryName">A directory name of the target destination location.</param>
        /// <param name="websiteURL">An untrusted name.</param>
        /// <returns>The Windows operating system qualified directory path.</returns>

        private string determineQualifiedPath(string directoryName, string websiteURL)
        {
            string qualifiedLengthPath   = string.Empty;
            string mainSiteDirectoryName = string.Empty;

            try {
                string websiteName = FileSystemFlow.removeWindowsFileSystemReservedCharacters(websiteURL.Substring(websiteURL.IndexOf('.') + 1));
                websiteName           = FileSystemFlow.limitCharactersToFirst(FileSystemFlow.WINDOWS_QUALIFIED_FILENAME_LENGTH - 1, websiteName);
                mainSiteDirectoryName = System.IO.Path.Combine(directoryName, websiteName);

                if (mainSiteDirectoryName.Length > (FileSystemFlow.WINDOWS_QUALIFIED_DIRECTORY_LENGTH - 1))
                {
                    int trimmingSize = mainSiteDirectoryName.Length - FileSystemFlow.WINDOWS_QUALIFIED_DIRECTORY_LENGTH;
                    websiteName           = websiteName.Substring(trimmingSize + 1);
                    mainSiteDirectoryName = System.IO.Path.Combine(directoryName, websiteName);
                }

                qualifiedLengthPath = mainSiteDirectoryName;
            }
            catch (ArgumentNullException x) {
                this.lastExceptionInfo.typeName   = x.GetType().ToString();
                this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                this.lastExceptionInfo.argName    = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName);
                this.lastExceptionInfo.argValue   = mainSiteDirectoryName.ToString();
                this.lastExceptionInfo.message    = x.Message;
                this.lastExceptionInfo.id         = "[SC-5]";
                string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue;
                StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine);
            }
            catch (ArgumentOutOfRangeException x) {
                this.lastExceptionInfo.typeName   = x.GetType().ToString();
                this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                this.lastExceptionInfo.argName    = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName);
                this.lastExceptionInfo.argValue   = mainSiteDirectoryName.ToString();
                this.lastExceptionInfo.message    = x.Message;
                this.lastExceptionInfo.id         = "[SC-5]";
                string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue;
                StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine);
            }
            catch (ArgumentException x) {
                this.lastExceptionInfo.typeName   = x.GetType().ToString();
                this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                this.lastExceptionInfo.argName    = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName);
                this.lastExceptionInfo.argValue   = mainSiteDirectoryName.ToString();
                this.lastExceptionInfo.message    = x.Message;
                this.lastExceptionInfo.id         = "[SC-5]";
                string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue;
                StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine);
            }
            catch (Exception x) {
                this.lastExceptionInfo.typeName   = x.GetType().ToString();
                this.lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                this.lastExceptionInfo.argName    = mainSiteDirectoryName.GetType().FullName + "~" + nameof(mainSiteDirectoryName);
                this.lastExceptionInfo.argValue   = mainSiteDirectoryName.ToString();
                this.lastExceptionInfo.message    = x.Message;
                this.lastExceptionInfo.id         = "[SC-5]";
                string args = lastExceptionInfo.argName + "=" + lastExceptionInfo.argValue;
                StdErrFlow.writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ") " + args + Environment.NewLine);
            }

            return(qualifiedLengthPath);
        }
Exemple #4
0
        //______________________________________________________________________________________________________________________________

        /// <summary>
        /// It tries to redirect standard error stream into a file whose name is defined in a constant STDERR_FILENAME.
        /// </summary>
        /// <returns>'true' if function code has been traversed, 'false' when any error occured.</returns>

        public static bool tryToRedirectStdErr()
        {
            try {
                oldStdErrStream = Console.Error;
                newStdErrStream = new StreamWriter(STDERR_FILENAME, false);
                Console.SetError(newStdErrStream);

                if (newStdErrStream == null)
                {
                    return(false);
                }

                isLastRedirectionSuccessfull = true;

                string appName = FileSystemFlow.getApplicationFullPath();
                appName = appName.Substring(appName.LastIndexOf('\\') + 1);

                writeLine(Environment.NewLine);
                writeLine("=============================================");
                writeLine("Error log for: " + appName.ToString());
                writeLine("Timestamp: " + DateTime.Now.ToString());
                writeLine("=============================================");
            }
            catch (UnauthorizedAccessException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (NotSupportedException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (ArgumentNullException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (ArgumentOutOfRangeException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (ArgumentException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (DirectoryNotFoundException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (PathTooLongException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (IOException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (System.Security.SecurityException x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }
            catch (Exception x) {
                lastExceptionInfo.typeName   = x.GetType().ToString();
                lastExceptionInfo.methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                lastExceptionInfo.argName    = newStdErrStream.GetType().FullName + "~" + nameof(newStdErrStream);
                lastExceptionInfo.argValue   = newStdErrStream.ToString();
                lastExceptionInfo.message    = x.Message;
                lastExceptionInfo.id         = "[SEF-2]";
                writeLine(lastExceptionInfo.id + " " + x.ToString() + " (" + lastExceptionInfo.methodName + ")");
                writeLine(Environment.NewLine);
                isLastRedirectionSuccessfull = false;
                return(false);
            }

            return(true);
        }