예제 #1
0
    public static async Task <WebDirectory> ParseIndex(HttpClient httpClient, WebDirectory webDirectory, string html)
    {
        try
        {
            OpenDirectoryIndexer.Session.GDIndex = true;

            string rootId = string.Empty;

            if (OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_GdIndex_RootId))
            {
                rootId = OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_GdIndex_RootId];
            }
            else
            {
                rootId = GetRootId(html);
                OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_GdIndex_RootId] = rootId;
            }

            if (!OpenDirectoryIndexer.Session.Parameters.ContainsKey(Constants.Parameters_Password))
            {
                Console.WriteLine($"{Parser} will always be indexed at a maximum rate of 1 per second, else you will run into problems and errors.");
                Logger.Info($"{Parser} will always be indexed at a maximum rate of 1 per second, else you will run into problems and errors.");

                Console.WriteLine("Check if password is needed...");
                Logger.Info("Check if password is needed...");
                OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] = null;

                HttpResponseMessage httpResponseMessage = await httpClient.PostAsync($"{webDirectory.Uri}?rootId={rootId}", null);

                GdIndexResponse indexResponse = null;

                if (httpResponseMessage.IsSuccessStatusCode)
                {
                    string responseJson = await httpResponseMessage.Content.ReadAsStringAsync();

                    indexResponse = GdIndexResponse.FromJson(responseJson);

                    if (indexResponse == null)
                    {
                        Console.WriteLine("Directory is password protected, please enter password:"******"Directory is password protected, please enter password.");

                        OpenDirectoryIndexer.Session.Parameters["GoIndex_Password"] = Console.ReadLine();

                        Console.WriteLine($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}");
                        Logger.Info($"Using password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}");

                        httpResponseMessage = await httpClient.PostAsync($"{webDirectory.Uri}?rootId={rootId}", new StringContent(JsonConvert.SerializeObject(new Dictionary <string, object>
                        {
                            { "page_index", 0 },
                            { "page_token", null },
                            { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] },
                            { "q", "" }
                        })));

                        if (httpResponseMessage.IsSuccessStatusCode)
                        {
                            responseJson = await httpResponseMessage.Content.ReadAsStringAsync();

                            indexResponse = GdIndexResponse.FromJson(responseJson);
                        }
                    }
                }

                if (indexResponse != null)
                {
                    Console.WriteLine("Password OK!");
                    Logger.Info("Password OK!");

                    webDirectory = await ScanIndexAsync(httpClient, webDirectory);
                }
                else
                {
                    OpenDirectoryIndexer.Session.Parameters.Remove(Constants.Parameters_Password);
                    Console.WriteLine($"Error. Stopping.");
                    Logger.Error($"Error. Stopping.");
                }
            }
            else
            {
                webDirectory = await ScanIndexAsync(httpClient, webDirectory);
            }
        }
        catch (Exception ex)
        {
            RateLimiter.AddDelay(TimeSpan.FromSeconds(5));
            Logger.Error(ex, $"Error parsing {Parser} for URL: {webDirectory.Url}");
            webDirectory.Error = true;

            OpenDirectoryIndexer.Session.Errors++;

            if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
            {
                OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
            }

            throw;
        }

        return(webDirectory);
    }
예제 #2
0
    private static async Task <WebDirectory> ScanIndexAsync(HttpClient httpClient, WebDirectory webDirectory)
    {
        webDirectory.Parser = Parser;

        try
        {
            Polly.Retry.AsyncRetryPolicy asyncRetryPolicy = Library.GetAsyncRetryPolicy((ex, waitTimeSpan, retry, pollyContext) =>
            {
                Logger.Warn($"Error retrieving directory listing for {webDirectory.Uri}, waiting {waitTimeSpan.TotalSeconds} seconds.. Error: {ex.Message}");
                RateLimiter.AddDelay(waitTimeSpan);
            }, 8);

            await asyncRetryPolicy.ExecuteAndCaptureAsync(async() =>
            {
                await RateLimiter.RateLimit();

                if (!webDirectory.Url.EndsWith("/"))
                {
                    webDirectory.Url += "/";
                }

                Logger.Warn($"Retrieving listings for {webDirectory.Uri}");

                HttpResponseMessage httpResponseMessage = await httpClient.PostAsync($"{OpenDirectoryIndexer.Session.Root.Url}{Uri.EscapeDataString(webDirectory.Url.Replace(OpenDirectoryIndexer.Session.Root.Url, string.Empty).TrimEnd('/'))}/?rootId={OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_GdIndex_RootId]}", null);

                webDirectory.ParsedSuccessfully = httpResponseMessage.IsSuccessStatusCode;
                httpResponseMessage.EnsureSuccessStatusCode();

                string responseJson = await httpResponseMessage.Content.ReadAsStringAsync();

                GdIndexResponse indexResponse = GdIndexResponse.FromJson(responseJson);

                webDirectory.ParsedSuccessfully = indexResponse != null;

                foreach (File file in indexResponse.Files)
                {
                    if (file.MimeType == FolderMimeType)
                    {
                        webDirectory.Subdirectories.Add(new WebDirectory(webDirectory)
                        {
                            Parser = Parser,
                            // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space...
                            Url  = $"{webDirectory.Uri}{file.Name}/",
                            Name = file.Name
                        });
                    }
                    else
                    {
                        webDirectory.Files.Add(new WebFile
                        {
                            Url      = new Uri(webDirectory.Uri, file.Name).ToString(),
                            FileName = file.Name,
                            FileSize = file.Size
                        });
                    }
                }
            });
        }
        catch (Exception ex)
        {
            Logger.Error(ex, $"Error retrieving directory listing for {webDirectory.Url}");
            webDirectory.Error = true;

            OpenDirectoryIndexer.Session.Errors++;

            if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
            {
                OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
            }

            //throw;
        }

        return(webDirectory);
    }