Beispiel #1
0
        public WebsiteTextExtractor(string storageDirForWebsite, string[] newParams, bool doContinue = false)
        {
            // Save action requested
            DoContinue = doContinue;

            // Reload params file
            FileInfo paramFileInfo = new FileInfo(Path.Combine(storageDirForWebsite, LogsDirName, ConfigFileName));

            if (!paramFileInfo.Exists)
            {
                throw new Exception("No configuration file found at : " + paramFileInfo.FullName);
            }
            using (StreamReader sr = new StreamReader(paramFileInfo.FullName))
            {
                ExtractorParams = WebsiteExtractorParams.ReadFromFile(sr);
            }
            // Override with new params
            if (newParams != null)
            {
                foreach (string keyValueParam in newParams)
                {
                    ExtractorParams.ParseParam(keyValueParam);
                }
            }

            Init();
        }
Beispiel #2
0
        private void InitLogFiles()
        {
            Console.CancelKeyPress += (object sender, ConsoleCancelEventArgs e) =>
            {
                e.Cancel = true;
                userCancelEventReceived = true;
            };

            var logsDirectory = new DirectoryInfo(Path.Combine(ContentDirectory.FullName, LogsDirName));

            if (!logsDirectory.Exists)
            {
                logsDirectory.Create();
            }

            using (var paramsWriter = new StreamWriter(Path.Combine(logsDirectory.FullName, ConfigFileName), DoContinue))
            {
                if (DoContinue)
                {
                    paramsWriter.WriteLine();
                }
                ExtractorParams.WriteToFile(paramsWriter);
            }

            requestsWriter = new StreamWriter(Path.Combine(logsDirectory.FullName, RequestsLogFileName), DoContinue);
            requestsWriter.Write("Clock");
            requestsWriter.Write(";");
            requestsWriter.Write("Url");
            requestsWriter.Write(";");
            requestsWriter.Write("Status code");
            requestsWriter.Write(";");
            requestsWriter.Write("Reponse time (ms)");
            requestsWriter.Write(";");
            requestsWriter.Write("Download time (ms)");
            requestsWriter.Write(";");
            requestsWriter.Write("Content size (bytes)");
            requestsWriter.Write(";");
            requestsWriter.Write("Unique text blocks (%)");
            requestsWriter.Write(";");
            requestsWriter.Write("Crawl depth");
            requestsWriter.Write(";");
            requestsWriter.Write("Parent Url");
            requestsWriter.Write(";");
            requestsWriter.Write("Redirected from");
            requestsWriter.Write(";");
            requestsWriter.Write("Retry count");
            requestsWriter.Write(";");
            requestsWriter.Write("Retry after (s)");
            requestsWriter.Write(";");
            requestsWriter.Write("Error message");
            requestsWriter.WriteLine();

            messagesWriter = new StreamWriter(Path.Combine(logsDirectory.FullName, MessagesLogFileName), DoContinue);

            exceptionsWriter = new StreamWriter(Path.Combine(logsDirectory.FullName, ExceptionsLogFileName), DoContinue);
            log4net.LogManager.SetTextWriter(exceptionsWriter);
        }