コード例 #1
0
        private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken cancellationToken)
        {
            Logger.Debug($"Start [{name}]");

            bool maxConnections = false;

            do
            {
                Interlocked.Increment(ref RunningWebDirectoryThreads);

                if (queue.TryDequeue(out WebDirectory webDirectory))
                {
                    try
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo[name] = webDirectory;
                        }

                        if (!Session.ProcessedUrls.Contains(webDirectory.Url))
                        {
                            Session.ProcessedUrls.Add(webDirectory.Url);
                            webDirectory.StartTime = DateTimeOffset.UtcNow;

                            Logger.Info($"[{name}] Begin processing {webDirectory.Url}");

                            if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps)
                            {
                                WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory, OpenDirectoryIndexerSettings.Username, OpenDirectoryIndexerSettings.Password);

                                if (webDirectory?.CancellationReason == Constants.Ftp_Max_Connections)
                                {
                                    webDirectory.CancellationReason = null;
                                    maxConnections = true;

                                    if (webDirectory.Name == Constants.Root)
                                    {
                                        webDirectory.Error = true;
                                        Interlocked.Decrement(ref RunningWebDirectoryThreads);
                                        throw new Exception("Error checking FTP because maximum connections reached");
                                    }

                                    // Requeue
                                    Session.ProcessedUrls.Remove(webDirectory.Url);
                                    queue.Enqueue(webDirectory);

                                    try
                                    {
                                        await FtpParser.FtpClients[name].DisconnectAsync(cancellationToken);

                                        lock (FtpParser.FtpClients)
                                        {
                                            FtpParser.FtpClients.Remove(name);
                                        }
                                    }
                                    catch (Exception exFtpDisconnect)
                                    {
                                        Logger.Error(exFtpDisconnect, "Error disconnecting FTP connection.");
                                    }
                                }

                                if (parsedWebDirectory != null)
                                {
                                    DirectoryParser.CheckParsedResults(parsedWebDirectory);
                                    AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                                }
                            }
                            else if (Session.Root.Uri.Host == Constants.GoogleDriveDomain)
                            {
                                string baseUrl = webDirectory.Url;

                                WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory);

                                parsedWebDirectory.Url = baseUrl;

                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                            }
                            else
                            {
                                if (Session.Root.Uri.Host == Constants.BlitzfilesTechDomain || SameHostAndDirectory(Session.Root.Uri, webDirectory.Uri))
                                {
                                    Logger.Debug($"[{name}] Start download '{webDirectory.Url}'");
                                    Session.TotalHttpRequests++;

                                    CancellationTokenSource cancellationTokenSource = new CancellationTokenSource();

                                    cancellationTokenSource.CancelAfter(TimeSpan.FromMinutes(5));

                                    Context pollyContext = new Context
                                    {
                                        { "Processor", name },
                                        { "WebDirectory", webDirectory },
                                        { "CancellationTokenSource", cancellationTokenSource }
                                    };

                                    await RetryPolicy.ExecuteAsync(async (context, token) => { await ProcessWebDirectoryAsync(name, webDirectory, cancellationTokenSource.Token); }, pollyContext, cancellationTokenSource.Token);
                                }
                                else
                                {
                                    Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path");

                                    Session.Skipped++;
                                }
                            }

                            Logger.Info($"[{name}] Finished processing {webDirectory.Url}");
                        }
                        else
                        {
                            //Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}");
                        }
                    }
                    catch (Exception ex)
                    {
                        if (ex is TaskCanceledException taskCanceledException)
                        {
                            Session.Errors++;
                            webDirectory.Error = true;

                            if (!Session.UrlsWithErrors.Contains(webDirectory.Url))
                            {
                                Session.UrlsWithErrors.Add(webDirectory.Url);
                            }

                            if (webDirectory.ParentDirectory?.Url != null)
                            {
                                Logger.Error($"Skipped processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'");
                            }
                            else
                            {
                                Logger.Error($"Skipped processing Url: '{webDirectory.Url}'");
                                Session.Root.Error = true;
                            }
                        }
                        else
                        {
                            Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory?.Url}'");
                        }
                    }
                    finally
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo.Remove(name);
                        }

                        if (string.IsNullOrWhiteSpace(webDirectory.CancellationReason))
                        {
                            webDirectory.Finished   = true;
                            webDirectory.FinishTime = DateTimeOffset.UtcNow;
                        }
                    }
                }

                Interlocked.Decrement(ref RunningWebDirectoryThreads);

                // Needed, because of the TryDequeue, no waiting in ConcurrentQueue!
                if (queue.IsEmpty)
                {
                    // Don't hog the CPU when queue < threads
                    await Task.Delay(TimeSpan.FromMilliseconds(1000), cancellationToken);
                }
                else
                {
                    await Task.Delay(TimeSpan.FromMilliseconds(10), cancellationToken);
                }
            }while (!cancellationToken.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0) && !maxConnections);

            Logger.Debug($"Finished [{name}]");
        }
コード例 #2
0
        public async void StartIndexingAsync()
        {
            bool fromFile = !string.IsNullOrWhiteSpace(OpenDirectoryIndexerSettings.FileName);

            if (fromFile)
            {
                Session = Library.LoadSessionJson(OpenDirectoryIndexerSettings.FileName);
                Console.WriteLine(Statistics.GetSessionStats(Session, includeExtensions: true));
                Console.ReadKey(intercept: true);
                return;
            }
            else
            {
                Session = new Session
                {
                    Started = DateTimeOffset.UtcNow,
                    Root    = new WebDirectory(parentWebDirectory: null)
                    {
                        Name = Constants.Root,
                        Url  = OpenDirectoryIndexerSettings.Url
                    },
                    MaxThreads = OpenDirectoryIndexerSettings.Threads
                };
            }

            Session.MaxThreads = OpenDirectoryIndexerSettings.Threads;

            if (Session.Root.Uri.Host == Constants.GoogleDriveDomain)
            {
                Logger.Warn("Google Drive scanning is limited to 9 directories per second!");
            }

            if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps)
            {
                Logger.Warn("Retrieving FTP(S) software!");

                if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftps)
                {
                    if (Session.Root.Uri.Port == -1)
                    {
                        Logger.Warn("Using default port (990) for FTPS");

                        UriBuilder uriBuilder = new UriBuilder(Session.Root.Uri)
                        {
                            Port = 990
                        };

                        Session.Root.Url = uriBuilder.Uri.ToString();
                    }
                }

                string serverInfo = await FtpParser.GetFtpServerInfo(Session.Root, OpenDirectoryIndexerSettings.Username, OpenDirectoryIndexerSettings.Password);

                if (string.IsNullOrWhiteSpace(serverInfo))
                {
                    serverInfo = "Failed or no server info available.";
                }
                else
                {
                    // Remove IP from server info
                    Regex.Replace(serverInfo, @"(Connected to )(\d*\.\d*.\d*.\d*)", "$1IP Address");

                    Session.Description = $"FTP INFO{Environment.NewLine}{serverInfo}";
                }

                Logger.Warn(serverInfo);
            }

            TimerStatistics = new System.Timers.Timer
            {
                Enabled  = true,
                Interval = TimeSpan.FromSeconds(30).TotalMilliseconds
            };

            TimerStatistics.Elapsed += TimerStatistics_Elapsed;

            IndexingTask = Task.Run(async() =>
            {
                try
                {
                    WebDirectoriesQueue = new ConcurrentQueue <WebDirectory>();

                    if (fromFile)
                    {
                        SetParentDirectories(Session.Root);

                        // TODO: Add unfinished items to queue, very complicated, we need to ALSO fill the ParentDirectory...
                        //// With filter predicate, with selection function
                        //var flatList = nodes.Flatten(n => n.IsDeleted == false, n => n.Children);
                        //var directoriesToDo = Session.Root.Subdirectories.Flatten(null, wd => wd.Subdirectories).Where(wd => !wd.Finished);
                    }
                    else
                    {
                        // Add root
                        WebDirectoriesQueue.Enqueue(Session.Root);
                    }

                    IndexingTaskCTS = new CancellationTokenSource();

                    for (int i = 1; i <= WebDirectoryProcessors.Length; i++)
                    {
                        string processorId = i.ToString();

                        WebDirectoryProcessors[i - 1] = WebDirectoryProcessor(WebDirectoriesQueue, $"Processor {processorId}", IndexingTaskCTS.Token);
                    }

                    for (int i = 1; i <= WebFileFileSizeProcessors.Length; i++)
                    {
                        string processorId = i.ToString();

                        WebFileFileSizeProcessors[i - 1] = WebFileFileSizeProcessor(WebFilesFileSizeQueue, $"Processor {processorId}", WebDirectoryProcessors, IndexingTaskCTS.Token);
                    }

                    await Task.WhenAll(WebDirectoryProcessors);
                    Console.WriteLine("Finshed indexing");
                    Logger.Info("Finshed indexing");

                    if (WebFilesFileSizeQueue.Any())
                    {
                        TimerStatistics.Interval = TimeSpan.FromSeconds(5).TotalMilliseconds;
                        Console.WriteLine($"Retrieving filesize of {WebFilesFileSizeQueue.Count} urls");
                    }

                    await Task.WhenAll(WebFileFileSizeProcessors);

                    TimerStatistics.Stop();

                    Session.Finished               = DateTimeOffset.UtcNow;
                    Session.TotalFiles             = Session.Root.TotalFiles;
                    Session.TotalFileSizeEstimated = Session.Root.TotalFileSize;

                    IEnumerable <string> distinctUrls = Session.Root.AllFileUrls.Distinct();

                    if (Session.TotalFiles != distinctUrls.Count())
                    {
                        Logger.Error($"Indexed files and unique files is not the same, please check results. Found a total of {Session.TotalFiles} files resulting in {distinctUrls.Count()} urls");
                    }

                    if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoUrls && Session.Root.Uri.Host != Constants.GoogleDriveDomain && Session.Root.Uri.Host != Constants.BlitzfilesTechDomain)
                    {
                        if (Session.TotalFiles > 0)
                        {
                            Logger.Info("Saving URL list to file...");
                            Console.WriteLine("Saving URL list to file...");

                            string scansPath = Library.GetScansPath();

                            try
                            {
                                string urlsFileName = $"{Library.CleanUriToFilename(Session.Root.Uri)}.txt";
                                string urlsPath     = Path.Combine(scansPath, urlsFileName);
                                File.WriteAllLines(urlsPath, distinctUrls);
                                Logger.Info($"Saved URL list to file: {urlsFileName}");
                                Console.WriteLine($"Saved URL list to file: {urlsFileName}");

                                if (OpenDirectoryIndexerSettings.CommandLineOptions.UploadUrls && Session.TotalFiles > 0)
                                {
                                    Console.WriteLine($"Uploading URLs ({FileSizeHelper.ToHumanReadable(new FileInfo(urlsPath).Length)})...");

                                    bool uploadSucceeded = false;

                                    try
                                    {
                                        GoFileIoFile uploadedFile = await GoFileIo.UploadFile(HttpClient, urlsPath);
                                        HistoryLogger.Info($"goFile.io: {JsonConvert.SerializeObject(uploadedFile)}");
                                        Session.UploadedUrlsUrl = uploadedFile.Url.ToString();
                                        uploadSucceeded         = true;

                                        Console.WriteLine($"Uploaded URLs link: {Session.UploadedUrlsUrl}");
                                    }
                                    catch (Exception ex)
                                    {
                                        Logger.Warn($"Error uploading URLs: {ex.Message}");
                                    }

                                    if (!uploadSucceeded)
                                    {
                                        Logger.Warn($"Using fallback for uploading URLs file.");

                                        try
                                        {
                                            UploadFilesIoFile uploadedFile = await UploadFilesIo.UploadFile(HttpClient, urlsPath);
                                            HistoryLogger.Info($"UploadFiles.io: {JsonConvert.SerializeObject(uploadedFile)}");
                                            Session.UploadedUrlsUrl = uploadedFile.Url.ToString();
                                            uploadSucceeded         = true;

                                            Console.WriteLine($"Uploaded URLs link: {Session.UploadedUrlsUrl}");
                                        }
                                        catch (Exception ex)
                                        {
                                            Logger.Warn($"Error uploading URLs: {ex.Message}");
                                        }
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                Logger.Error(ex);
                            }
                        }
                        else
                        {
                            Logger.Info("No URLs to save");
                            Console.WriteLine("No URLs to save");
                        }
                    }

                    distinctUrls = null;

                    if (OpenDirectoryIndexerSettings.CommandLineOptions.Speedtest && Session.Root.Uri.Host != Constants.GoogleDriveDomain && Session.Root.Uri.Host != Constants.BlitzfilesTechDomain)
                    {
                        if (Session.TotalFiles > 0)
                        {
                            if (Session.Root.Uri.Scheme == Constants.UriScheme.Http || Session.Root.Uri.Scheme == Constants.UriScheme.Https)
                            {
                                try
                                {
                                    WebFile biggestFile = Session.Root.AllFiles.OrderByDescending(f => f.FileSize).First();

                                    Console.WriteLine($"Starting speedtest (10-25 seconds)...");
                                    Console.WriteLine($"Test file: {FileSizeHelper.ToHumanReadable(biggestFile.FileSize)} {biggestFile.Url}");
                                    Session.SpeedtestResult = await Library.DoSpeedTestHttpAsync(HttpClient, biggestFile.Url);

                                    if (Session.SpeedtestResult != null)
                                    {
                                        Console.WriteLine($"Finished speedtest. Downloaded: {FileSizeHelper.ToHumanReadable(Session.SpeedtestResult.DownloadedBytes)}, Time: {Session.SpeedtestResult.ElapsedMilliseconds / 1000:F1} s, Speed: {Session.SpeedtestResult.MaxMBsPerSecond:F1} MB/s ({Session.SpeedtestResult.MaxMBsPerSecond * 8:F0} mbit)");
                                    }
                                }
                                catch (Exception ex)
                                {
                                    // Give empty speedtest, so it will be reported as Failed
                                    Session.SpeedtestResult = new Shared.SpeedtestResult();
                                    Logger.Error(ex, "Speedtest failed");
                                }
                            }
                            else if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps)
                            {
                                try
                                {
                                    FluentFTP.FtpClient ftpClient = FtpParser.FtpClients.FirstOrDefault(c => c.Value.IsConnected).Value;

                                    FtpParser.CloseAll(exceptFtpClient: ftpClient);

                                    if (ftpClient != null)
                                    {
                                        WebFile biggestFile = Session.Root.AllFiles.OrderByDescending(f => f.FileSize).First();

                                        Console.WriteLine($"Starting speedtest (10-25 seconds)...");
                                        Console.WriteLine($"Test file: {FileSizeHelper.ToHumanReadable(biggestFile.FileSize)} {biggestFile.Url}");

                                        Session.SpeedtestResult = await Library.DoSpeedTestFtpAsync(ftpClient, biggestFile.Url);

                                        if (Session.SpeedtestResult != null)
                                        {
                                            Console.WriteLine($"Finished speedtest. Downloaded: {FileSizeHelper.ToHumanReadable(Session.SpeedtestResult.DownloadedBytes)}, Time: {Session.SpeedtestResult.ElapsedMilliseconds / 1000:F1} s, Speed: {Session.SpeedtestResult.MaxMBsPerSecond:F1} MB/s ({Session.SpeedtestResult.MaxMBsPerSecond * 8:F0} mbit)");
                                        }
                                    }
                                    else
                                    {
                                        Console.WriteLine($"Cannot do speedtest because there is no connected FTP client anymore");
                                    }
                                }
                                catch (Exception ex)
                                {
                                    // Give empty speedtest, so it will be reported as Failed
                                    Session.SpeedtestResult = new Shared.SpeedtestResult();
                                    Logger.Error(ex, "Speedtest failed");
                                }
                            }
                        }
                    }

                    if (Session.Root.Uri.Scheme == Constants.UriScheme.Ftp || Session.Root.Uri.Scheme == Constants.UriScheme.Ftps)
                    {
                        FtpParser.CloseAll();
                    }

                    Logger.Info("Logging sessions stats...");
                    try
                    {
                        string sessionStats = Statistics.GetSessionStats(Session, includeExtensions: true, includeBanner: true);
                        Logger.Info(sessionStats);
                        HistoryLogger.Info(sessionStats);
                        Logger.Info("Logged sessions stats");

                        if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoReddit)
                        {
                            // Also log to screen, when saving links or JSON fails and the logs keep filling by other sessions, this will be saved
                            Console.WriteLine(sessionStats);
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex);
                    }

                    if (Session.UrlsWithErrors.Any())
                    {
                        Logger.Info("URLs with errors:");
                        Console.WriteLine("URLs with errors:");

                        foreach (string urlWithError in Session.UrlsWithErrors.OrderBy(u => u))
                        {
                            Logger.Info(urlWithError);
                            Console.WriteLine(urlWithError);
                        }
                    }

                    if (OpenDirectoryIndexerSettings.CommandLineOptions.Json)
                    {
                        Logger.Info("Save session to JSON");
                        Console.WriteLine("Save session to JSON");

                        try
                        {
                            Library.SaveSessionJson(Session);
                            Logger.Info($"Saved session: {Library.CleanUriToFilename(Session.Root.Uri)}.json");
                            Console.WriteLine($"Saved session: {Library.CleanUriToFilename(Session.Root.Uri)}.json");
                        }
                        catch (Exception ex)
                        {
                            Logger.Error(ex);
                        }
                    }

                    Logger.Info("Finished indexing!");
                    Console.WriteLine("Finished indexing!");

                    Program.SetConsoleTitle($"✔ {Program.ConsoleTitle}");

                    if (OpenDirectoryIndexerSettings.CommandLineOptions.Quit)
                    {
                        Command.KillApplication();
                    }
                    else
                    {
                        Console.WriteLine("Press ESC to exit! Or C to copy to clipboard and quit!");
                    }
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            });
        }
コード例 #3
0
        private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken cancellationToken)
        {
            Logger.Debug($"Start [{name}]");

            do
            {
                Interlocked.Increment(ref RunningWebDirectoryThreads);

                if (queue.TryDequeue(out WebDirectory webDirectory))
                {
                    try
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo[name] = webDirectory;
                        }

                        if (!Session.ProcessedUrls.Contains(webDirectory.Url))
                        {
                            Session.ProcessedUrls.Add(webDirectory.Url);
                            webDirectory.StartTime = DateTimeOffset.UtcNow;

                            Logger.Info($"[{name}] Begin processing {webDirectory.Url}");

                            if (Session.Root.Uri.Scheme == "ftp")
                            {
                                WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory);

                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                            }
                            else
                            if (Session.Root.Uri.Host == Constants.GoogleDriveDomain)
                            {
                                string baseUrl = webDirectory.Url;

                                WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory);

                                parsedWebDirectory.Url = baseUrl;

                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                            }
                            else
                            {
                                if (SameHostAndDirectory(Session.Root.Uri, webDirectory.Uri))
                                {
                                    Logger.Debug($"[{name}] Start download '{webDirectory.Url}'");
                                    Session.TotalHttpRequests++;

                                    CancellationTokenSource cancellationTokenSource = new CancellationTokenSource();

                                    cancellationTokenSource.CancelAfter(TimeSpan.FromMinutes(5));

                                    Context pollyContext = new Context
                                    {
                                        { "Processor", name },
                                        { "WebDirectory", webDirectory },
                                        { "CancellationTokenSource", cancellationTokenSource }
                                    };

                                    await RetryPolicy.ExecuteAsync(async (context, token) => { await ProcessWebDirectoryAsync(name, webDirectory, cancellationTokenSource.Token); }, pollyContext, cancellationTokenSource.Token);
                                }
                                else
                                {
                                    Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path");

                                    Session.Skipped++;
                                }
                            }

                            Logger.Info($"[{name}] Finished processing {webDirectory.Url}");
                        }
                        else
                        {
                            //Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}");
                        }
                    }
                    catch (Exception ex)
                    {
                        if (ex is TaskCanceledException taskCanceledException)
                        {
                            if (webDirectory.ParentDirectory?.Url != null)
                            {
                                Logger.Warn($"Skipped processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'");
                            }
                            else
                            {
                                Logger.Warn($"Skipped processing Url: '{webDirectory.Url}'");
                                Session.Root.Error = true;
                            }
                        }
                        else
                        {
                            Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory?.Url}'");
                        }

                        Session.Errors++;

                        if (!Session.UrlsWithErrors.Contains(webDirectory.Url))
                        {
                            Session.UrlsWithErrors.Add(webDirectory.Url);
                        }
                    }
                    finally
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo.Remove(name);
                        }

                        webDirectory.Finished   = true;
                        webDirectory.FinishTime = DateTimeOffset.UtcNow;
                    }
                }

                Interlocked.Decrement(ref RunningWebDirectoryThreads);

                // Needed!
                await Task.Delay(TimeSpan.FromMilliseconds(10));
            }while (!cancellationToken.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0));

            Logger.Debug($"Finished [{name}]");
        }
コード例 #4
0
        public async void StartIndexingAsync()
        {
            bool fromFile = !string.IsNullOrWhiteSpace(OpenDirectoryIndexerSettings.FileName);

            if (fromFile)
            {
                Session = Library.LoadSessionJson(OpenDirectoryIndexerSettings.FileName);
                Console.WriteLine(Statistics.GetSessionStats(Session, includeExtensions: true));
                Console.ReadKey(intercept: true);
                return;
            }
            else
            {
                Session = new Session
                {
                    Started = DateTimeOffset.UtcNow,
                    Root    = new WebDirectory(parentWebDirectory: null)
                    {
                        Name = "ROOT",
                        Url  = OpenDirectoryIndexerSettings.Url
                    }
                };
            }

            if (Session.Root.Uri.Host == Constants.GoogleDriveDomain)
            {
                Logger.Warn("Google Drive scanning is limited to 9 directories per second!");
            }

            if (Session.Root.Uri.Scheme == "ftp")
            {
                Logger.Warn("Retrieving FTP software!");
                // TODO: Replace with library?
                Logger.Warn(await FtpParser.GetFtpServerInfo(Session.Root));
                //AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
            }

            TimerStatistics = new System.Timers.Timer
            {
                Enabled  = true,
                Interval = TimeSpan.FromSeconds(30).TotalMilliseconds
            };

            TimerStatistics.Elapsed += TimerStatistics_Elapsed;

            IndexingTask = Task.Run(async() =>
            {
                try
                {
                    WebDirectoriesQueue = new ConcurrentQueue <WebDirectory>();

                    if (fromFile)
                    {
                        SetParentDirectories(Session.Root);

                        // TODO: Add unfinished items to queue, very complicated, we need to ALSO fill the ParentDirectory...
                        //// With filter predicate, with selection function
                        //var flatList = nodes.Flatten(n => n.IsDeleted == false, n => n.Children);
                        //var directoriesToDo = Session.Root.Subdirectories.Flatten(null, wd => wd.Subdirectories).Where(wd => !wd.Finished);
                    }
                    else
                    {
                        // Add root
                        WebDirectoriesQueue.Enqueue(Session.Root);
                    }

                    IndexingTaskCTS = new CancellationTokenSource();

                    for (int i = 1; i <= WebDirectoryProcessors.Length; i++)
                    {
                        string processorId = i.ToString();

                        WebDirectoryProcessors[i - 1] = WebDirectoryProcessor(WebDirectoriesQueue, $"Processor {processorId}", IndexingTaskCTS.Token);
                    }

                    for (int i = 1; i <= WebFileFileSizeProcessors.Length; i++)
                    {
                        string processorId = i.ToString();

                        WebFileFileSizeProcessors[i - 1] = WebFileFileSizeProcessor(WebFilesFileSizeQueue, $"Processor {processorId}", IndexingTaskCTS.Token, WebDirectoryProcessors);
                    }

                    await Task.WhenAll(WebDirectoryProcessors);
                    Console.WriteLine("Finshed indexing");
                    Logger.Info("Finshed indexing");

                    if (Session.Root.Uri.Scheme == "ftp")
                    {
                        FtpParser.CloseAll();
                    }

                    if (WebFilesFileSizeQueue.Any())
                    {
                        TimerStatistics.Interval = TimeSpan.FromSeconds(5).TotalMilliseconds;
                        Console.WriteLine($"Retrieving filesize of {WebFilesFileSizeQueue.Count} urls");
                    }

                    await Task.WhenAll(WebFileFileSizeProcessors);

                    TimerStatistics.Stop();

                    Session.Finished               = DateTimeOffset.UtcNow;
                    Session.TotalFiles             = Session.Root.TotalFiles;
                    Session.TotalFileSizeEstimated = Session.Root.TotalFileSize;

                    if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoUrls && Session.Root.Uri.Host != Constants.GoogleDriveDomain)
                    {
                        if (Session.TotalFiles > 0)
                        {
                            Logger.Info("Saving URL list to file...");
                            Console.WriteLine("Saving URL list to file...");

                            string scansPath = Library.GetScansPath();

                            try
                            {
                                string fileUrls = string.Join(Environment.NewLine, Session.Root.AllFileUrls.Distinct());

                                string urlsFileName = $"{Library.CleanUriToFilename(Session.Root.Uri)}.txt";
                                string urlsPath     = Path.Combine(scansPath, urlsFileName);
                                Logger.Info("String joined");
                                File.WriteAllText(urlsPath, fileUrls);
                                Logger.Info($"Saved URL list to file: {urlsFileName}");
                                Console.WriteLine($"Saved URL list to file: {urlsFileName}");

                                if (OpenDirectoryIndexerSettings.CommandLineOptions.UploadUrls && Session.TotalFiles > 0)
                                {
                                    Console.WriteLine("Uploading URLs...");

                                    //UploadFilesFile uploadFilesFile = await UploadFileIo.UploadFile(HttpClient, urlsPath);
                                    //HistoryLogger.Info($"uploadfiles.io: {JsonConvert.SerializeObject(uploadFilesFile)}");
                                    //Session.UploadedUrlsUrl = uploadFilesFile.Url.ToString();

                                    GoFilesFile uploadedFile = await GoFileIo.UploadFile(HttpClient, urlsPath);
                                    HistoryLogger.Info($"goFile.io: {JsonConvert.SerializeObject(uploadedFile)}");
                                    Session.UploadedUrlsUrl = uploadedFile.Url.ToString();

                                    Console.WriteLine($"Uploaded URLs: {Session.UploadedUrlsUrl}");
                                }
                            }
                            catch (Exception ex)
                            {
                                Logger.Error(ex);
                            }
                        }
                        else
                        {
                            Logger.Info("No URLs to save");
                            Console.WriteLine("No URLs to save");
                        }
                    }

                    if (OpenDirectoryIndexerSettings.CommandLineOptions.Speedtest && Session.Root.Uri.Host != Constants.GoogleDriveDomain)
                    {
                        if (Session.TotalFiles > 0)
                        {
                            if (Session.Root.Uri.Scheme == "https" || Session.Root.Uri.Scheme == "http")
                            {
                                try
                                {
                                    WebFile biggestFile = Session.Root.AllFiles.OrderByDescending(f => f.FileSize).First();

                                    Console.WriteLine($"Starting speedtest (10-25 seconds)...");
                                    Console.WriteLine($"Test file: {FileSizeHelper.ToHumanReadable(biggestFile.FileSize)} {biggestFile.Url}");
                                    Session.SpeedtestResult = await Library.DoSpeedTestAsync(HttpClient, biggestFile.Url);
                                    Console.WriteLine($"Finished speedtest. Downloaded: {FileSizeHelper.ToHumanReadable(Session.SpeedtestResult.DownloadedBytes)}, Time: {Session.SpeedtestResult.ElapsedMiliseconds / 1000:F1} s, Speed: {Session.SpeedtestResult.MaxMBsPerSecond:F1} MB/s ({Session.SpeedtestResult.MaxMBsPerSecond * 8:F0} mbit)");
                                }
                                catch (Exception ex)
                                {
                                    Logger.Error(ex, "Speedtest failed");
                                }
                            }
                            else
                            {
                                Logger.Warn($"Only a speedtest for HTTP(S), not '{Session.Root.Uri.Scheme}'");
                            }
                        }
                    }

                    Logger.Info("Logging sessions stats...");
                    try
                    {
                        string sessionStats = Statistics.GetSessionStats(Session, includeExtensions: true);
                        Logger.Info(sessionStats);
                        HistoryLogger.Info(sessionStats);
                        Logger.Info("Logged sessions stats");

                        if (!OpenDirectoryIndexerSettings.CommandLineOptions.NoReddit)
                        {
                            // Also log to screen, when saving links or JSON fails and the logs keep filling by other sessions, this will be saved
                            Console.WriteLine(sessionStats);
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex);
                    }

                    if (Session.UrlsWithErrors.Any())
                    {
                        Logger.Info("URLs with errors:");
                        Console.WriteLine("URLs with errors:");

                        foreach (string urlWithError in Session.UrlsWithErrors.OrderBy(u => u))
                        {
                            Logger.Info(urlWithError);
                            Console.WriteLine(urlWithError);
                        }
                    }

                    if (OpenDirectoryIndexerSettings.CommandLineOptions.Json)
                    {
                        Logger.Info("Save session to JSON");
                        Console.WriteLine("Save session to JSON");

                        try
                        {
                            Library.SaveSessionJson(Session);
                            Logger.Info($"Saved session: {PathHelper.GetValidPath(Session.Root.Url)}.json");
                            Console.WriteLine($"Saved session: {PathHelper.GetValidPath(Session.Root.Url)}.json");
                        }
                        catch (Exception ex)
                        {
                            Logger.Error(ex);
                        }
                    }

                    Logger.Info("Finished indexing!");
                    Console.WriteLine("Finished indexing!");

                    Program.SetConsoleTitle($"✔ {Program.ConsoleTitle}");

                    if (OpenDirectoryIndexerSettings.CommandLineOptions.Quit)
                    {
                        Command.KillApplication();
                    }
                    else
                    {
                        Console.WriteLine("Press ESC to exit! Or C to copy to clipboard and quit!");
                    }
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            });
        }
コード例 #5
0
        private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken token)
        {
            Logger.Debug($"Start [{name}]");

            do
            {
                Interlocked.Increment(ref RunningWebDirectoryThreads);

                if (queue.TryDequeue(out WebDirectory webDirectory))
                {
                    try
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo[name] = webDirectory;
                        }

                        if (!Session.ProcessedUrls.Contains(webDirectory.Url))
                        {
                            Session.ProcessedUrls.Add(webDirectory.Url);
                            Logger.Info($"[{name}] Begin processing {webDirectory.Url}");

                            if (Session.Root.Uri.Scheme == "ftp")
                            {
                                WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory);

                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                            }
                            else
                            if (Session.Root.Uri.Host == "drive.google.com")
                            {
                                string baseUrl = webDirectory.Url;

                                WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory);

                                parsedWebDirectory.Url = baseUrl;

                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                            }
                            else
                            {
                                if (webDirectory.Uri.Host == Session.Root.Uri.Host && webDirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath))
                                {
                                    Logger.Debug($"[{name}] Start download '{webDirectory.Url}'");
                                    Session.TotalHttpRequests++;

                                    await RetryPolicy.ExecuteAsync(async() =>
                                    {
                                        webDirectory.StartTime = DateTimeOffset.UtcNow;

                                        HttpResponseMessage httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url);
                                        string html = null;

                                        if (httpResponseMessage.IsSuccessStatusCode)
                                        {
                                            html = await GetHtml(httpResponseMessage);
                                        }

                                        if (FirstRequest && !httpResponseMessage.IsSuccessStatusCode || httpResponseMessage.IsSuccessStatusCode && string.IsNullOrWhiteSpace(html))
                                        {
                                            Logger.Warn("First request fails, using Curl fallback User-Agent");
                                            HttpClient.DefaultRequestHeaders.UserAgent.Clear();
                                            HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Curl);
                                            httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url);

                                            if (httpResponseMessage.IsSuccessStatusCode)
                                            {
                                                html = await GetHtml(httpResponseMessage);
                                                Logger.Warn("Yes, this Curl User-Agent did the trick!");
                                            }
                                        }

                                        if (FirstRequest && !httpResponseMessage.IsSuccessStatusCode || httpResponseMessage.IsSuccessStatusCode && string.IsNullOrWhiteSpace(html))
                                        {
                                            Logger.Warn("First request fails, using Chrome fallback User-Agent");
                                            HttpClient.DefaultRequestHeaders.UserAgent.Clear();
                                            HttpClient.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgent_Chrome);
                                            httpResponseMessage = await HttpClient.GetAsync(webDirectory.Url);

                                            if (httpResponseMessage.IsSuccessStatusCode)
                                            {
                                                html = await GetHtml(httpResponseMessage);
                                                Logger.Warn("Yes, the Chrome User-Agent did the trick!");
                                            }
                                        }

                                        bool calibreDetected        = false;
                                        string calibreVersionString = string.Empty;

                                        if (httpResponseMessage.IsSuccessStatusCode)
                                        {
                                            FirstRequest = false;

                                            List <string> serverHeaders = new List <string>();

                                            if (httpResponseMessage.Headers.Contains("Server"))
                                            {
                                                serverHeaders = httpResponseMessage.Headers.GetValues("Server").ToList();

                                                calibreDetected = serverHeaders.Any(h => h.Contains("calibre"));
                                            }

                                            if (calibreDetected)
                                            {
                                                string serverHeader  = string.Join("/", serverHeaders);
                                                calibreVersionString = serverHeader;
                                            }
                                            else
                                            {
                                                if (html == null)
                                                {
                                                    html = await GetHtml(httpResponseMessage);
                                                }

                                                // UNTESTED (cannot find or down Calibre with this issue)
                                                const string calibreVersionIdentifier = "CALIBRE_VERSION = \"";
                                                calibreDetected = html?.Contains(calibreVersionIdentifier) == true;

                                                if (calibreDetected)
                                                {
                                                    int calibreVersionIdentifierStart = html.IndexOf(calibreVersionIdentifier);
                                                    calibreVersionString = html.Substring(calibreVersionIdentifierStart, html.IndexOf("\"", ++calibreVersionIdentifierStart));
                                                }
                                            }
                                        }

                                        if (calibreDetected)
                                        {
                                            Version calibreVersion = CalibreParser.ParseVersion(calibreVersionString);

                                            Console.WriteLine($"Calibre {calibreVersion} detected! I will index it at max 100 books per 30 seconds, else it will break Calibre...");
                                            Logger.Info($"Calibre {calibreVersion} detected! I will index it at max 100 books per 30 seconds, else it will break Calibre...");

                                            await CalibreParser.ParseCalibre(HttpClient, httpResponseMessage.RequestMessage.RequestUri, webDirectory, calibreVersion);

                                            return;
                                        }

                                        Uri originalUri = new Uri(webDirectory.Url);
                                        Logger.Debug($"[{name}] Finish download '{webDirectory.Url}'");

                                        // Process only same site
                                        if (httpResponseMessage.RequestMessage.RequestUri.Host == Session.Root.Uri.Host)
                                        {
                                            int httpStatusCode = (int)httpResponseMessage.StatusCode;

                                            if (!Session.HttpStatusCodes.ContainsKey(httpStatusCode))
                                            {
                                                Session.HttpStatusCodes[httpStatusCode] = 0;
                                            }

                                            Session.HttpStatusCodes[httpStatusCode]++;

                                            if (httpResponseMessage.IsSuccessStatusCode)
                                            {
                                                if (html == null)
                                                {
                                                    html = await GetHtml(httpResponseMessage);
                                                }

                                                Session.TotalHttpTraffic += html.Length;

                                                WebDirectory parsedWebDirectory = await DirectoryParser.ParseHtml(webDirectory, html, HttpClient);
                                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                                            }
                                            else
                                            {
                                                Session.Errors++;
                                                webDirectory.Error = true;

                                                if (!Session.UrlsWithErrors.Contains(webDirectory.Url))
                                                {
                                                    Session.UrlsWithErrors.Add(webDirectory.Url);
                                                }

                                                httpResponseMessage.EnsureSuccessStatusCode();
                                            }
                                        }
                                        else
                                        {
                                            Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' which points to '{httpResponseMessage.RequestMessage.RequestUri}'");
                                            Session.Skipped++;
                                        }
                                    });
                                }
                                else
                                {
                                    Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path");

                                    Session.Skipped++;
                                }
                            }

                            Logger.Info($"[{name}] Finished processing {webDirectory.Url}");
                        }
                        else
                        {
                            Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}");
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'");

                        Session.Errors++;

                        if (!Session.UrlsWithErrors.Contains(webDirectory.Url))
                        {
                            Session.UrlsWithErrors.Add(webDirectory.Url);
                        }
                    }
                    finally
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo.Remove(name);
                        }
                    }
                }

                Interlocked.Decrement(ref RunningWebDirectoryThreads);

                // Needed!
                await Task.Delay(TimeSpan.FromMilliseconds(10));
            }while (!token.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0));

            Logger.Debug($"Finished [{name}]");
        }
コード例 #6
0
        private async Task WebDirectoryProcessor(ConcurrentQueue <WebDirectory> queue, string name, CancellationToken token)
        {
            Logger.Debug($"Start [{name}]");

            do
            {
                Interlocked.Increment(ref RunningWebDirectoryThreads);

                if (queue.TryDequeue(out WebDirectory webDirectory))
                {
                    try
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo[name] = webDirectory;
                        }

                        if (!Session.ProcessedUrls.Contains(webDirectory.Url))
                        {
                            Session.ProcessedUrls.Add(webDirectory.Url);
                            Logger.Info($"[{name}] Begin processing {webDirectory.Url}");

                            if (Session.Root.Uri.Scheme == "ftp")
                            {
                                WebDirectory parsedWebDirectory = await FtpParser.ParseFtpAsync(name, webDirectory);

                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                            }
                            else
                            if (Session.Root.Uri.Host == "drive.google.com")
                            {
                                string baseUrl = webDirectory.Url;

                                WebDirectory parsedWebDirectory = await GoogleDriveIndexer.IndexAsync(webDirectory);

                                parsedWebDirectory.Url = baseUrl;

                                AddProcessedWebDirectory(webDirectory, parsedWebDirectory);
                            }
                            else
                            {
                                if (webDirectory.Uri.Host == Session.Root.Uri.Host && webDirectory.Uri.LocalPath.StartsWith(Session.Root.Uri.LocalPath))
                                {
                                    Logger.Debug($"[{name}] Start download '{webDirectory.Url}'");
                                    Session.TotalHttpRequests++;
                                    Context pollyContext = new Context();
                                    pollyContext.Add("Processor", name);
                                    pollyContext.Add("WebDirectory", webDirectory);
                                    await RetryPolicy.ExecuteAsync(ctx => ProcessWebDirectoryAsync(name, webDirectory), pollyContext);
                                }
                                else
                                {
                                    Logger.Warn($"[{name}] Skipped result of '{webDirectory.Url}' because it is not the same host or path");

                                    Session.Skipped++;
                                }
                            }

                            Logger.Info($"[{name}] Finished processing {webDirectory.Url}");
                        }
                        else
                        {
                            Logger.Warn($"[{name}] Skip, already processed: {webDirectory.Uri}");
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex, $"Error processing Url: '{webDirectory.Url}' from parent '{webDirectory.ParentDirectory.Url}'");

                        Session.Errors++;

                        if (!Session.UrlsWithErrors.Contains(webDirectory.Url))
                        {
                            Session.UrlsWithErrors.Add(webDirectory.Url);
                        }
                    }
                    finally
                    {
                        lock (WebDirectoryProcessorInfoLock)
                        {
                            WebDirectoryProcessorInfo.Remove(name);
                        }
                    }
                }

                Interlocked.Decrement(ref RunningWebDirectoryThreads);

                // Needed!
                await Task.Delay(TimeSpan.FromMilliseconds(10));
            }while (!token.IsCancellationRequested && (!queue.IsEmpty || RunningWebDirectoryThreads > 0));

            Logger.Debug($"Finished [{name}]");
        }