private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken cancellationToken) { Logger.Debug($"Start [{name}]"); bool maxConnections = false; do { Interlocked.Increment(ref RunningWebDirectoryThreads); if (queue.TryDequeue(out WebDirectory webDirectory)) { try { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo[name] = webDirectory; } if (!Session.ProcessedUrls.Contains(webDirectory.Url)) { Session.ProcessedUrls.Add(webDirectory.Url); webDirectory.StartTime = DateTimeOffset.UtcNow; Logger.Info($"[{name}] Begin processing {webDirectory.Url}"); if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps) { WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory, OpenDirectoryIndexerSettings.Username, OpenDirectoryIndexerSettings.Password); if (webDirectory?.CancellationReason == Constants.Ftp_Max_Connections) { webDirectory.CancellationReason = null; maxConnections = true; if (webDirectory.Name == Constants.Root) { webDirectory.Error = true; Interlocked.Decrement(ref RunningWebDirectoryThreads); throw new Exception("Error checking FTP because maximum connections reached"); } // Requeue Session.ProcessedUrls.Remove(webDirectory.Url); queue.Enqueue(webDirectory); try { await FtpParser.FtpClients[name].DisconnectAsync(cancellationToken); lock (FtpParser.FtpClients) { FtpParser.FtpClients.Remove(name); } } catch (Exception exFtpDisconnect) { Logger.Error(exFtpDisconnect, "Error disconnecting FTP connection."); } } if (parsedWebDirectory != null) { DirectoryParser.CheckParsedResults(parsedWebDirectory); AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } } else if (Session.Root.Uri.Host == Constants.GoogleDriveDomain) { string baseUrl = webDirectory.Url; WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory); parsedWebDirectory.Url = baseUrl; AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else { if (Session.Root.Uri.Host == Constants.BlitzfilesTechDomain || SameHostAndDirectory(Session.Root.Uri, webDirectory.Uri)) { Logger.Debug($"[{name}] Start download '{webDirectory.Url}'"); Session.TotalHttpRequests++; CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(); cancellationTokenSource.CancelAfter(TimeSpan.FromMinutes(5)); Context pollyContext = new Context { { "Processor", name }, { "WebDirectory", webDirectory }, { "CancellationTokenSource", cancellationTokenSource } }; await RetryPolicy.ExecuteAsync(async (context, token) => { await ProcessWebDirectoryAsync(name, webDirectory, cancellationTokenSource.Token); }, pollyContext, cancellationTokenSource.Token); } else { Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path"); Session.Skipped++; } } Logger.Info($"[{name}] Finished processing {webDirectory.Url}"); } else { //Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}"); } } catch (Exception ex) { if (ex is TaskCanceledException taskCanceledException) { Session.Errors++; webDirectory.Error = true; if (!Session.UrlsWithErrors.Contains(webDirectory.Url)) { Session.UrlsWithErrors.Add(webDirectory.Url); } if (webDirectory.ParentDirectory?.Url != null) { Logger.Error($"Skipped processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'"); } else { Logger.Error($"Skipped processing Url: '{webDirectory.Url}'"); Session.Root.Error = true; } } else { Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory?.Url}'"); } } finally { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo.Remove(name); } if (string.IsNullOrWhiteSpace(webDirectory.CancellationReason)) { webDirectory.Finished = true; webDirectory.FinishTime = DateTimeOffset.UtcNow; } } } Interlocked.Decrement(ref RunningWebDirectoryThreads); // Needed, because of the TryDequeue, no waiting in ConcurrentQueue! if (queue.IsEmpty) { // Don't hog the CPU when queue < threads await Task.Delay(TimeSpan.FromMilliseconds(1000), cancellationToken); } else { await Task.Delay(TimeSpan.FromMilliseconds(10), cancellationToken); } }while (!cancellationToken.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0) && !maxConnections); Logger.Debug($"Finished [{name}]"); }
public async void StartIndexingAsync() { bool fromFile = !string.IsNullOrWhiteSpace(OpenDirectoryIndexerSettings.FileName); if (fromFile) { Session = Library.LoadSessionJson(OpenDirectoryIndexerSettings.FileName); Console.WriteLine(Statistics.GetSessionStats(Session, includeExtensions: true)); Console.ReadKey(intercept: true); return; } else { Session = new Session { Started = DateTimeOffset.UtcNow, Root = new WebDirectory(parentWebDirectory: null) { Name = Constants.Root, Url = OpenDirectoryIndexerSettings.Url }, MaxThreads = OpenDirectoryIndexerSettings.Threads }; } Session.MaxThreads = OpenDirectoryIndexerSettings.Threads; if (Session.Root.Uri.Host == Constants.GoogleDriveDomain) { Logger.Warn("Google Drive scanning is limited to 9 directories per second!"); } if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps) { Logger.Warn("Retrieving FTP(S) software!"); if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftps) { if (Session.Root.Uri.Port == -1) { Logger.Warn("Using default port (990) for FTPS"); UriBuilder uriBuilder = new UriBuilder(Session.Root.Uri) { Port = 990 }; Session.Root.Url = uriBuilder.Uri.ToString(); } } string serverInfo = await FtpParser.GetFtpServerInfo(Session.Root, OpenDirectoryIndexerSettings.Username, OpenDirectoryIndexerSettings.Password); if (string.IsNullOrWhiteSpace(serverInfo)) { serverInfo = "Failed or no server info available."; } else { // Remove IP from server info Regex.Replace(serverInfo, @"(Connected to )(\d*\.\d*.\d*.\d*)", "$1IP Address"); Session.Description = $"FTP INFO{Environment.NewLine}{serverInfo}"; } Logger.Warn(serverInfo); } TimerStatistics = new System.Timers.Timer { Enabled = true, Interval = TimeSpan.FromSeconds(30).TotalMilliseconds }; TimerStatistics.Elapsed += TimerStatistics_Elapsed; IndexingTask = Task.Run(async() => { try { WebDirectoriesQueue = new ConcurrentQueue <WebDirectory>(); if (fromFile) { SetParentDirectories(Session.Root); // TODO: Add unfinished items to queue, very complicated, we need to ALSO fill the ParentDirectory... //// With filter predicate, with selection function //var flatList = nodes.Flatten(n => n.IsDeleted == false, n => n.Children); //var directoriesToDo = Session.Root.Subdirectories.Flatten(null, wd => wd.Subdirectories).Where(wd => !wd.Finished); } else { // Add root WebDirectoriesQueue.Enqueue(Session.Root); } IndexingTaskCTS = new CancellationTokenSource(); for (int i = 1; i <= WebDirectoryProcessors.Length; i++) { string processorId = i.ToString(); WebDirectoryProcessors[i - 1] = WebDirectoryProcessor(WebDirectoriesQueue, $"Processor {processorId}", IndexingTaskCTS.Token); } for (int i = 1; i <= WebFileFileSizeProcessors.Length; i++) { string processorId = i.ToString(); WebFileFileSizeProcessors[i - 1] = WebFileFileSizeProcessor(WebFilesFileSizeQueue, $"Processor {processorId}", WebDirectoryProcessors, IndexingTaskCTS.Token); } await Task.WhenAll(WebDirectoryProcessors); Console.WriteLine("Finshed indexing"); Logger.Info("Finshed indexing"); if (WebFilesFileSizeQueue.Any()) { TimerStatistics.Interval = TimeSpan.FromSeconds(5).TotalMilliseconds; Console.WriteLine($"Retrieving filesize of {WebFilesFileSizeQueue.Count} urls"); } await Task.WhenAll(WebFileFileSizeProcessors); TimerStatistics.Stop(); Session.Finished = DateTimeOffset.UtcNow; Session.TotalFiles = Session.Root.TotalFiles; Session.TotalFileSizeEstimated = Session.Root.TotalFileSize; IEnumerable <string> distinctUrls = Session.Root.AllFileUrls.Distinct(); if (Session.TotalFiles != distinctUrls.Count()) { Logger.Error($"Indexed files and unique files is not the same, please check results. Found a total of {Session.TotalFiles} files resulting in {distinctUrls.Count()} urls"); } if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoUrls && Session.Root.Uri.Host != Constants.GoogleDriveDomain && Session.Root.Uri.Host != Constants.BlitzfilesTechDomain) { if (Session.TotalFiles > 0) { Logger.Info("Saving URL list to file..."); Console.WriteLine("Saving URL list to file..."); string scansPath = Library.GetScansPath(); try { string urlsFileName = $"{Library.CleanUriToFilename(Session.Root.Uri)}.txt"; string urlsPath = Path.Combine(scansPath, urlsFileName); File.WriteAllLines(urlsPath, distinctUrls); Logger.Info($"Saved URL list to file: {urlsFileName}"); Console.WriteLine($"Saved URL list to file: {urlsFileName}"); if (OpenDirectoryIndexerSettings.CommandLineOptions.UploadUrls && Session.TotalFiles > 0) { Console.WriteLine($"Uploading URLs ({FileSizeHelper.ToHumanReadable(new FileInfo(urlsPath).Length)})..."); bool uploadSucceeded = false; try { GoFileIoFile uploadedFile = await GoFileIo.UploadFile(HttpClient, urlsPath); HistoryLogger.Info($"goFile.io: {JsonConvert.SerializeObject(uploadedFile)}"); Session.UploadedUrlsUrl = uploadedFile.Url.ToString(); uploadSucceeded = true; Console.WriteLine($"Uploaded URLs link: {Session.UploadedUrlsUrl}"); } catch (Exception ex) { Logger.Warn($"Error uploading URLs: {ex.Message}"); } if (!uploadSucceeded) { Logger.Warn($"Using fallback for uploading URLs file."); try { UploadFilesIoFile uploadedFile = await UploadFilesIo.UploadFile(HttpClient, urlsPath); HistoryLogger.Info($"UploadFiles.io: {JsonConvert.SerializeObject(uploadedFile)}"); Session.UploadedUrlsUrl = uploadedFile.Url.ToString(); uploadSucceeded = true; Console.WriteLine($"Uploaded URLs link: {Session.UploadedUrlsUrl}"); } catch (Exception ex) { Logger.Warn($"Error uploading URLs: {ex.Message}"); } } } } catch (Exception ex) { Logger.Error(ex); } } else { Logger.Info("No URLs to save"); Console.WriteLine("No URLs to save"); } } distinctUrls = null; if (OpenDirectoryIndexerSettings.CommandLineOptions.Speedtest && Session.Root.Uri.Host != Constants.GoogleDriveDomain && Session.Root.Uri.Host != Constants.BlitzfilesTechDomain) { if (Session.TotalFiles > 0) { if (Session.Root.Uri.Scheme == Constants.UriScheme.Http || Session.Root.Uri.Scheme == Constants.UriScheme.Https) { try { WebFile biggestFile = Session.Root.AllFiles.OrderByDescending(f => f.FileSize).First(); Console.WriteLine($"Starting speedtest (10-25 seconds)..."); Console.WriteLine($"Test file: {FileSizeHelper.ToHumanReadable(biggestFile.FileSize)} {biggestFile.Url}"); Session.SpeedtestResult = await Library.DoSpeedTestHttpAsync(HttpClient, biggestFile.Url); if (Session.SpeedtestResult != null) { Console.WriteLine($"Finished speedtest. Downloaded: {FileSizeHelper.ToHumanReadable(Session.SpeedtestResult.DownloadedBytes)}, Time: {Session.SpeedtestResult.ElapsedMilliseconds / 1000:F1} s, Speed: {Session.SpeedtestResult.MaxMBsPerSecond:F1} MB/s ({Session.SpeedtestResult.MaxMBsPerSecond * 8:F0} mbit)"); } } catch (Exception ex) { // Give empty speedtest, so it will be reported as Failed Session.SpeedtestResult = new Shared.SpeedtestResult(); Logger.Error(ex, "Speedtest failed"); } } else if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps) { try { FluentFTP.FtpClient ftpClient = FtpParser.FtpClients.FirstOrDefault(c => c.Value.IsConnected).Value; FtpParser.CloseAll(exceptFtpClient: ftpClient); if (ftpClient != null) { WebFile biggestFile = Session.Root.AllFiles.OrderByDescending(f => f.FileSize).First(); Console.WriteLine($"Starting speedtest (10-25 seconds)..."); Console.WriteLine($"Test file: {FileSizeHelper.ToHumanReadable(biggestFile.FileSize)} {biggestFile.Url}"); Session.SpeedtestResult = await Library.DoSpeedTestFtpAsync(ftpClient, biggestFile.Url); if (Session.SpeedtestResult != null) { Console.WriteLine($"Finished speedtest. Downloaded: {FileSizeHelper.ToHumanReadable(Session.SpeedtestResult.DownloadedBytes)}, Time: {Session.SpeedtestResult.ElapsedMilliseconds / 1000:F1} s, Speed: {Session.SpeedtestResult.MaxMBsPerSecond:F1} MB/s ({Session.SpeedtestResult.MaxMBsPerSecond * 8:F0} mbit)"); } } else { Console.WriteLine($"Cannot do speedtest because there is no connected FTP client anymore"); } } catch (Exception ex) { // Give empty speedtest, so it will be reported as Failed Session.SpeedtestResult = new Shared.SpeedtestResult(); Logger.Error(ex, "Speedtest failed"); } } } } if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps) { FtpParser.CloseAll(); } Logger.Info("Logging sessions stats..."); try { string sessionStats = Statistics.GetSessionStats(Session, includeExtensions: true, includeBanner: true); Logger.Info(sessionStats); HistoryLogger.Info(sessionStats); Logger.Info("Logged sessions stats"); if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoReddit) { // Also log to screen, when saving links or JSON fails and the logs keep filling by other sessions, this will be saved Console.WriteLine(sessionStats); } } catch (Exception ex) { Logger.Error(ex); } if (Session.UrlsWithErrors.Any()) { Logger.Info("URLs with errors:"); Console.WriteLine("URLs with errors:"); foreach (string urlWithError in Session.UrlsWithErrors.OrderBy(u => u)) { Logger.Info(urlWithError); Console.WriteLine(urlWithError); } } if (OpenDirectoryIndexerSettings.CommandLineOptions.Json) { Logger.Info("Save session to JSON"); Console.WriteLine("Save session to JSON"); try { Library.SaveSessionJson(Session); Logger.Info($"Saved session: {Library.CleanUriToFilename(Session.Root.Uri)}.json"); Console.WriteLine($"Saved session: {Library.CleanUriToFilename(Session.Root.Uri)}.json"); } catch (Exception ex) { Logger.Error(ex); } } Logger.Info("Finished indexing!"); Console.WriteLine("Finished indexing!"); Program.SetConsoleTitle($"✔ {Program.ConsoleTitle}"); if (OpenDirectoryIndexerSettings.CommandLineOptions.Quit) { Command.KillApplication(); } else { Console.WriteLine("Press ESC to exit! Or C to copy to clipboard and quit!"); } } catch (Exception ex) { Logger.Error(ex); } }); }
private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken cancellationToken) { Logger.Debug($"Start [{name}]"); do { Interlocked.Increment(ref RunningWebDirectoryThreads); if (queue.TryDequeue(out WebDirectory webDirectory)) { try { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo[name] = webDirectory; } if (!Session.ProcessedUrls.Contains(webDirectory.Url)) { Session.ProcessedUrls.Add(webDirectory.Url); webDirectory.StartTime = DateTimeOffset.UtcNow; Logger.Info($"[{name}] Begin processing {webDirectory.Url}"); if (Session.Root.Uri.Scheme == "ftp") { WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory); AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else if (Session.Root.Uri.Host == Constants.GoogleDriveDomain) { string baseUrl = webDirectory.Url; WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory); parsedWebDirectory.Url = baseUrl; AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else { if (SameHostAndDirectory(Session.Root.Uri, webDirectory.Uri)) { Logger.Debug($"[{name}] Start download '{webDirectory.Url}'"); Session.TotalHttpRequests++; CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(); cancellationTokenSource.CancelAfter(TimeSpan.FromMinutes(5)); Context pollyContext = new Context { { "Processor", name }, { "WebDirectory", webDirectory }, { "CancellationTokenSource", cancellationTokenSource } }; await RetryPolicy.ExecuteAsync(async (context, token) => { await ProcessWebDirectoryAsync(name, webDirectory, cancellationTokenSource.Token); }, pollyContext, cancellationTokenSource.Token); } else { Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path"); Session.Skipped++; } } Logger.Info($"[{name}] Finished processing {webDirectory.Url}"); } else { //Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}"); } } catch (Exception ex) { if (ex is TaskCanceledException taskCanceledException) { if (webDirectory.ParentDirectory?.Url != null) { Logger.Warn($"Skipped processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'"); } else { Logger.Warn($"Skipped processing Url: '{webDirectory.Url}'"); Session.Root.Error = true; } } else { Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory?.Url}'"); } Session.Errors++; if (!Session.UrlsWithErrors.Contains(webDirectory.Url)) { Session.UrlsWithErrors.Add(webDirectory.Url); } } finally { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo.Remove(name); } webDirectory.Finished = true; webDirectory.FinishTime = DateTimeOffset.UtcNow; } } Interlocked.Decrement(ref RunningWebDirectoryThreads); // Needed! await Task.Delay(TimeSpan.FromMilliseconds(10)); }while (!cancellationToken.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0)); Logger.Debug($"Finished [{name}]"); }
public async void StartIndexingAsync() { bool fromFile = !string.IsNullOrWhiteSpace(OpenDirectoryIndexerSettings.FileName); if (fromFile) { Session = Library.LoadSessionJson(OpenDirectoryIndexerSettings.FileName); Console.WriteLine(Statistics.GetSessionStats(Session, includeExtensions: true)); Console.ReadKey(intercept: true); return; } else { Session = new Session { Started = DateTimeOffset.UtcNow, Root = new WebDirectory(parentWebDirectory: null) { Name = "ROOT", Url = OpenDirectoryIndexerSettings.Url } }; } if (Session.Root.Uri.Host == Constants.GoogleDriveDomain) { Logger.Warn("Google Drive scanning is limited to 9 directories per second!"); } if (Session.Root.Uri.Scheme == "ftp") { Logger.Warn("Retrieving FTP software!"); // TODO: Replace with library? Logger.Warn(await FtpParser.GetFtpServerInfo(Session.Root)); //AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } TimerStatistics = new System.Timers.Timer { Enabled = true, Interval = TimeSpan.FromSeconds(30).TotalMilliseconds }; TimerStatistics.Elapsed += TimerStatistics_Elapsed; IndexingTask = Task.Run(async() => { try { WebDirectoriesQueue = new ConcurrentQueue <WebDirectory>(); if (fromFile) { SetParentDirectories(Session.Root); // TODO: Add unfinished items to queue, very complicated, we need to ALSO fill the ParentDirectory... //// With filter predicate, with selection function //var flatList = nodes.Flatten(n => n.IsDeleted == false, n => n.Children); //var directoriesToDo = Session.Root.Subdirectories.Flatten(null, wd => wd.Subdirectories).Where(wd => !wd.Finished); } else { // Add root WebDirectoriesQueue.Enqueue(Session.Root); } IndexingTaskCTS = new CancellationTokenSource(); for (int i = 1; i <= WebDirectoryProcessors.Length; i++) { string processorId = i.ToString(); WebDirectoryProcessors[i - 1] = WebDirectoryProcessor(WebDirectoriesQueue, $"Processor {processorId}", IndexingTaskCTS.Token); } for (int i = 1; i <= WebFileFileSizeProcessors.Length; i++) { string processorId = i.ToString(); WebFileFileSizeProcessors[i - 1] = WebFileFileSizeProcessor(WebFilesFileSizeQueue, $"Processor {processorId}", IndexingTaskCTS.Token, WebDirectoryProcessors); } await Task.WhenAll(WebDirectoryProcessors); Console.WriteLine("Finshed indexing"); Logger.Info("Finshed indexing"); if (Session.Root.Uri.Scheme == "ftp") { FtpParser.CloseAll(); } if (WebFilesFileSizeQueue.Any()) { TimerStatistics.Interval = TimeSpan.FromSeconds(5).TotalMilliseconds; Console.WriteLine($"Retrieving filesize of {WebFilesFileSizeQueue.Count} urls"); } await Task.WhenAll(WebFileFileSizeProcessors); TimerStatistics.Stop(); Session.Finished = DateTimeOffset.UtcNow; Session.TotalFiles = Session.Root.TotalFiles; Session.TotalFileSizeEstimated = Session.Root.TotalFileSize; if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoUrls && Session.Root.Uri.Host != Constants.GoogleDriveDomain) { if (Session.TotalFiles > 0) { Logger.Info("Saving URL list to file..."); Console.WriteLine("Saving URL list to file..."); string scansPath = Library.GetScansPath(); try { string fileUrls = string.Join(Environment.NewLine, Session.Root.AllFileUrls.Distinct()); string urlsFileName = $"{Library.CleanUriToFilename(Session.Root.Uri)}.txt"; string urlsPath = Path.Combine(scansPath, urlsFileName); Logger.Info("String joined"); File.WriteAllText(urlsPath, fileUrls); Logger.Info($"Saved URL list to file: {urlsFileName}"); Console.WriteLine($"Saved URL list to file: {urlsFileName}"); if (OpenDirectoryIndexerSettings.CommandLineOptions.UploadUrls && Session.TotalFiles > 0) { Console.WriteLine("Uploading URLs..."); //UploadFilesFile uploadFilesFile = await UploadFileIo.UploadFile(HttpClient, urlsPath); //HistoryLogger.Info($"uploadfiles.io: {JsonConvert.SerializeObject(uploadFilesFile)}"); //Session.UploadedUrlsUrl = uploadFilesFile.Url.ToString(); GoFilesFile uploadedFile = await GoFileIo.UploadFile(HttpClient, urlsPath); HistoryLogger.Info($"goFile.io: {JsonConvert.SerializeObject(uploadedFile)}"); Session.UploadedUrlsUrl = uploadedFile.Url.ToString(); Console.WriteLine($"Uploaded URLs: {Session.UploadedUrlsUrl}"); } } catch (Exception ex) { Logger.Error(ex); } } else { Logger.Info("No URLs to save"); Console.WriteLine("No URLs to save"); } } if (OpenDirectoryIndexerSettings.CommandLineOptions.Speedtest && Session.Root.Uri.Host != Constants.GoogleDriveDomain) { if (Session.TotalFiles > 0) { if (Session.Root.Uri.Scheme == "https" || Session.Root.Uri.Scheme == "http") { try { WebFile biggestFile = Session.Root.AllFiles.OrderByDescending(f => f.FileSize).First(); Console.WriteLine($"Starting speedtest (10-25 seconds)..."); Console.WriteLine($"Test file: {FileSizeHelper.ToHumanReadable(biggestFile.FileSize)} {biggestFile.Url}"); Session.SpeedtestResult = await Library.DoSpeedTestAsync(HttpClient, biggestFile.Url); Console.WriteLine($"Finished speedtest. Downloaded: {FileSizeHelper.ToHumanReadable(Session.SpeedtestResult.DownloadedBytes)}, Time: {Session.SpeedtestResult.ElapsedMiliseconds / 1000:F1} s, Speed: {Session.SpeedtestResult.MaxMBsPerSecond:F1} MB/s ({Session.SpeedtestResult.MaxMBsPerSecond * 8:F0} mbit)"); } catch (Exception ex) { Logger.Error(ex, "Speedtest failed"); } } else { Logger.Warn($"Only a speedtest for HTTP(S), not '{Session.Root.Uri.Scheme}'"); } } } Logger.Info("Logging sessions stats..."); try { string sessionStats = Statistics.GetSessionStats(Session, includeExtensions: true); Logger.Info(sessionStats); HistoryLogger.Info(sessionStats); Logger.Info("Logged sessions stats"); if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoReddit) { // Also log to screen, when saving links or JSON fails and the logs keep filling by other sessions, this will be saved Console.WriteLine(sessionStats); } } catch (Exception ex) { Logger.Error(ex); } if (Session.UrlsWithErrors.Any()) { Logger.Info("URLs with errors:"); Console.WriteLine("URLs with errors:"); foreach (string urlWithError in Session.UrlsWithErrors.OrderBy(u => u)) { Logger.Info(urlWithError); Console.WriteLine(urlWithError); } } if (OpenDirectoryIndexerSettings.CommandLineOptions.Json) { Logger.Info("Save session to JSON"); Console.WriteLine("Save session to JSON"); try { Library.SaveSessionJson(Session); Logger.Info($"Saved session: {PathHelper.GetValidPath(Session.Root.Url)}.json"); Console.WriteLine($"Saved session: {PathHelper.GetValidPath(Session.Root.Url)}.json"); } catch (Exception ex) { Logger.Error(ex); } } Logger.Info("Finished indexing!"); Console.WriteLine("Finished indexing!"); Program.SetConsoleTitle($"✔ {Program.ConsoleTitle}"); if (OpenDirectoryIndexerSettings.CommandLineOptions.Quit) { Command.KillApplication(); } else { Console.WriteLine("Press ESC to exit! Or C to copy to clipboard and quit!"); } } catch (Exception ex) { Logger.Error(ex); } }); }
private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken token) { Logger.Debug($"Start [{name}]"); do { Interlocked.Increment(ref RunningWebDirectoryThreads); if (queue.TryDequeue(out WebDirectory webDirectory)) { try { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo[name] = webDirectory; } if (!Session.ProcessedUrls.Contains(webDirectory.Url)) { Session.ProcessedUrls.Add(webDirectory.Url); Logger.Info($"[{name}] Begin processing {webDirectory.Url}"); if (Session.Root.Uri.Scheme == "ftp") { WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory); AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else if (Session.Root.Uri.Host == "drive.google.com") { string baseUrl = webDirectory.Url; WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory); parsedWebDirectory.Url = baseUrl; AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else { if (webDirectory.Uri.Host == Session.Root.Uri.Host && webDirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath)) { Logger.Debug($"[{name}] Start download '{webDirectory.Url}'"); Session.TotalHttpRequests++; await RetryPolicy.ExecuteAsync(async() => { webDirectory.StartTime = DateTimeOffset.UtcNow; HttpResponseMessage httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url); string html = null; if (httpResponseMessage.IsSuccessStatusCode) { html = await GetHtml(httpResponseMessage); } if (FirstRequest && !httpResponseMessage.IsSuccessStatusCode || httpResponseMessage.IsSuccessStatusCode && string.IsNullOrWhiteSpace(html)) { Logger.Warn("First request fails, using Curl fallback User-Agent"); HttpClient.DefaultRequestHeaders.UserAgent.Clear(); HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Curl); httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url); if (httpResponseMessage.IsSuccessStatusCode) { html = await GetHtml(httpResponseMessage); Logger.Warn("Yes, this Curl User-Agent did the trick!"); } } if (FirstRequest && !httpResponseMessage.IsSuccessStatusCode || httpResponseMessage.IsSuccessStatusCode && string.IsNullOrWhiteSpace(html)) { Logger.Warn("First request fails, using Chrome fallback User-Agent"); HttpClient.DefaultRequestHeaders.UserAgent.Clear(); HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Chrome); httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url); if (httpResponseMessage.IsSuccessStatusCode) { html = await GetHtml(httpResponseMessage); Logger.Warn("Yes, the Chrome User-Agent did the trick!"); } } bool calibreDetected = false; string calibreVersionString = string.Empty; if (httpResponseMessage.IsSuccessStatusCode) { FirstRequest = false; List <string> serverHeaders = new List <string>(); if (httpResponseMessage.Headers.Contains("Server")) { serverHeaders = httpResponseMessage.Headers.GetValues("Server").ToList(); calibreDetected = serverHeaders.Any(h => h.Contains("calibre")); } if (calibreDetected) { string serverHeader = string.Join("/", serverHeaders); calibreVersionString = serverHeader; } else { if (html == null) { html = await GetHtml(httpResponseMessage); } // UNTESTED (cannot find or down Calibre with this issue) const string calibreVersionIdentifier = "CALIBRE_VERSION = \""; calibreDetected = html?.Contains(calibreVersionIdentifier) == true; if (calibreDetected) { int calibreVersionIdentifierStart = html.IndexOf(calibreVersionIdentifier); calibreVersionString = html.Substring(calibreVersionIdentifierStart, html.IndexOf("\"", ++calibreVersionIdentifierStart)); } } } if (calibreDetected) { Version calibreVersion = CalibreParser.ParseVersion(calibreVersionString); Console.WriteLine($"Calibre {calibreVersion} detected! I will index it at max 100 books per 30 seconds, else it will break Calibre..."); Logger.Info($"Calibre {calibreVersion} detected! I will index it at max 100 books per 30 seconds, else it will break Calibre..."); await CalibreParser.ParseCalibre(HttpClient, httpResponseMessage.RequestMessage.RequestUri, webDirectory, calibreVersion); return; } Uri originalUri = new Uri(webDirectory.Url); Logger.Debug($"[{name}] Finish download '{webDirectory.Url}'"); // Process only same site if (httpResponseMessage.RequestMessage.RequestUri.Host == Session.Root.Uri.Host) { int httpStatusCode = (int)httpResponseMessage.StatusCode; if (!Session.HttpStatusCodes.ContainsKey(httpStatusCode)) { Session.HttpStatusCodes[httpStatusCode] = 0; } Session.HttpStatusCodes[httpStatusCode]++; if (httpResponseMessage.IsSuccessStatusCode) { if (html == null) { html = await GetHtml(httpResponseMessage); } Session.TotalHttpTraffic += html.Length; WebDirectory parsedWebDirectory = await DirectoryParser.ParseHtml(webDirectory, html, HttpClient); AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else { Session.Errors++; webDirectory.Error = true; if (!Session.UrlsWithErrors.Contains(webDirectory.Url)) { Session.UrlsWithErrors.Add(webDirectory.Url); } httpResponseMessage.EnsureSuccessStatusCode(); } } else { Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' which points to '{httpResponseMessage.RequestMessage.RequestUri}'"); Session.Skipped++; } }); } else { Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path"); Session.Skipped++; } } Logger.Info($"[{name}] Finished processing {webDirectory.Url}"); } else { Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}"); } } catch (Exception ex) { Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'"); Session.Errors++; if (!Session.UrlsWithErrors.Contains(webDirectory.Url)) { Session.UrlsWithErrors.Add(webDirectory.Url); } } finally { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo.Remove(name); } } } Interlocked.Decrement(ref RunningWebDirectoryThreads); // Needed! await Task.Delay(TimeSpan.FromMilliseconds(10)); }while (!token.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0)); Logger.Debug($"Finished [{name}]"); }
private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken token) { Logger.Debug($"Start [{name}]"); do { Interlocked.Increment(ref RunningWebDirectoryThreads); if (queue.TryDequeue(out WebDirectory webDirectory)) { try { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo[name] = webDirectory; } if (!Session.ProcessedUrls.Contains(webDirectory.Url)) { Session.ProcessedUrls.Add(webDirectory.Url); Logger.Info($"[{name}] Begin processing {webDirectory.Url}"); if (Session.Root.Uri.Scheme == "ftp") { WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory); AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else if (Session.Root.Uri.Host == "drive.google.com") { string baseUrl = webDirectory.Url; WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory); parsedWebDirectory.Url = baseUrl; AddProcessedWebDirectory(webDirectory, parsedWebDirectory); } else { if (webDirectory.Uri.Host == Session.Root.Uri.Host && webDirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath)) { Logger.Debug($"[{name}] Start download '{webDirectory.Url}'"); Session.TotalHttpRequests++; Context pollyContext = new Context(); pollyContext.Add("Processor", name); pollyContext.Add("WebDirectory", webDirectory); await RetryPolicy.ExecuteAsync(ctx => ProcessWebDirectoryAsync(name, webDirectory), pollyContext); } else { Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path"); Session.Skipped++; } } Logger.Info($"[{name}] Finished processing {webDirectory.Url}"); } else { Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}"); } } catch (Exception ex) { Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'"); Session.Errors++; if (!Session.UrlsWithErrors.Contains(webDirectory.Url)) { Session.UrlsWithErrors.Add(webDirectory.Url); } } finally { lock (WebDirectoryProcessorInfoLock) { WebDirectoryProcessorInfo.Remove(name); } } } Interlocked.Decrement(ref RunningWebDirectoryThreads); // Needed! await Task.Delay(TimeSpan.FromMilliseconds(10)); }while (!token.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0)); Logger.Debug($"Finished [{name}]"); }