private static async Task <WebDirectory> ScanAsync(IHtmlDocument htmlDocument, HttpClient httpClient, WebDirectory webDirectory) { webDirectory.Parser = Parser; try { Polly.Retry.AsyncRetryPolicy asyncRetryPolicy = Library.GetAsyncRetryPolicy((ex, waitTimeSpan, retry, pollyContext) => { Logger.Warn($"Error retrieving directory listing for {webDirectory.Uri}, waiting {waitTimeSpan.TotalSeconds} seconds.. Error: {ex.Message}"); RateLimiter.AddDelay(waitTimeSpan); }, 8); if (!webDirectory.Url.EndsWith("/")) { webDirectory.Url += "/"; } long pageIndex = 0; string nextPageToken = string.Empty; do { await asyncRetryPolicy.ExecuteAndCaptureAsync(async() => { await RateLimiter.RateLimit(); Logger.Warn($"Retrieving listings for {webDirectory.Uri.PathAndQuery}, page {pageIndex + 1}{(!string.IsNullOrWhiteSpace(OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]) ? $" with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}" : string.Empty)}"); Dictionary <string, string> postValues = new Dictionary <string, string> { { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, { "page_token", nextPageToken }, { "page_index", pageIndex.ToString() } }; HttpRequestMessage httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, webDirectory.Uri) { Content = new FormUrlEncodedContent(postValues) }; HttpResponseMessage httpResponseMessage = await httpClient.SendAsync(httpRequestMessage); webDirectory.ParsedSuccessfully = httpResponseMessage.IsSuccessStatusCode; httpResponseMessage.EnsureSuccessStatusCode(); try { string responseJson = await DecodeResponse(htmlDocument, httpClient, httpResponseMessage); BhadooIndexResponse indexResponse = BhadooIndexResponse.FromJson(responseJson); webDirectory.ParsedSuccessfully = indexResponse.Data.Error == null; if (indexResponse.Data.Error?.Message == "Rate Limit Exceeded") { throw new Exception("Rate limit exceeded"); } else { if (indexResponse.Data.Files == null) { throw new Exception("Directory listing retrieval error (Files null)"); } else { nextPageToken = indexResponse.NextPageToken; pageIndex = indexResponse.CurPageIndex + 1; foreach (File file in indexResponse.Data.Files) { if (file.MimeType == FolderMimeType) { webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) { Parser = Parser, // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space... Url = $"{webDirectory.Uri}{GetSafeName(file.Name)}/", Name = file.Name }); } else { webDirectory.Files.Add(new WebFile { Url = new Uri(webDirectory.Uri, GetSafeName(file.Name)).ToString(), FileName = file.Name, FileSize = file.Size }); } } } } } catch (Exception ex) { throw new Exception($"Retrieving listings for {webDirectory.Uri.PathAndQuery}, page {pageIndex + 1}. Error: {ex.Message}"); } }); } while (!string.IsNullOrWhiteSpace(nextPageToken)); } catch (Exception ex) { Logger.Error(ex, $"Error retrieving directory listing for {webDirectory.Url}"); webDirectory.Error = true; OpenDirectoryIndexer.Session.Errors++; if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) { OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); } //throw; } return(webDirectory); }
private static async Task <WebDirectory> ScanIndexAsync(HttpClient httpClient, WebDirectory webDirectory) { webDirectory.Parser = Parser; try { Polly.Retry.AsyncRetryPolicy asyncRetryPolicy = Library.GetAsyncRetryPolicy((ex, waitTimeSpan, retry, pollyContext) => { Logger.Warn($"Error retrieving directory listing for {webDirectory.Uri}, waiting {waitTimeSpan.TotalSeconds} seconds.. Error: {ex.Message}"); RateLimiter.AddDelay(waitTimeSpan); }, 8); if (!webDirectory.Url.EndsWith("/")) { webDirectory.Url += "/"; } long pageIndex = 0; string nextPageToken = null; do { await asyncRetryPolicy.ExecuteAndCaptureAsync(async() => { await RateLimiter.RateLimit(); Logger.Warn($"Retrieving listings for {webDirectory.Uri.PathAndQuery}, page {pageIndex + 1}{(!string.IsNullOrWhiteSpace(OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]) ? $" with password: {OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password]}" : string.Empty)}"); HttpResponseMessage httpResponseMessage = await httpClient.PostAsync(webDirectory.Uri, new StringContent(JsonConvert.SerializeObject(new Dictionary <string, object> { { "page_index", pageIndex }, { "page_token", nextPageToken }, { "password", OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_Password] }, { "q", "" } }))); webDirectory.ParsedSuccessfully = httpResponseMessage.IsSuccessStatusCode; httpResponseMessage.EnsureSuccessStatusCode(); string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); Go2IndexResponse indexResponse = Go2IndexResponse.FromJson(responseJson); webDirectory.ParsedSuccessfully = indexResponse.Error == null; if (indexResponse.Error != null) { throw new Exception($"{indexResponse.Error.Code} | {indexResponse.Error.Message}"); } if (indexResponse.Data?.Error != null) { if (indexResponse.Data.Error?.Message == "Rate Limit Exceeded") { throw new Exception("Rate limit exceeded"); } else { throw new Exception($"Error in response: {indexResponse.Data.Error?.Code} | {indexResponse.Data.Error?.Message}"); } } nextPageToken = indexResponse.NextPageToken; pageIndex = indexResponse.CurPageIndex + 1; foreach (File file in indexResponse.Data.Files) { if (file.MimeType == FolderMimeType) { webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) { Parser = Parser, // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space... Url = $"{webDirectory.Uri}{Uri.EscapeDataString(file.Name)}/", Name = file.Name }); } else { webDirectory.Files.Add(new WebFile { Url = new Uri(webDirectory.Uri, Uri.EscapeDataString(file.Name)).ToString(), FileName = file.Name, FileSize = file.Size }); } } }); } while (!string.IsNullOrWhiteSpace(nextPageToken)); } catch (Exception ex) { Logger.Error(ex, $"Error retrieving directory listing for {webDirectory.Url}"); webDirectory.Error = true; OpenDirectoryIndexer.Session.Errors++; if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) { OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); } //throw; } return(webDirectory); }
private static async Task <WebDirectory> ScanIndexAsync(HttpClient httpClient, WebDirectory webDirectory) { webDirectory.Parser = Parser; try { Polly.Retry.AsyncRetryPolicy asyncRetryPolicy = Library.GetAsyncRetryPolicy((ex, waitTimeSpan, retry, pollyContext) => { Logger.Warn($"Error retrieving directory listing for {webDirectory.Uri}, waiting {waitTimeSpan.TotalSeconds} seconds.. Error: {ex.Message}"); RateLimiter.AddDelay(waitTimeSpan); }, 8); await asyncRetryPolicy.ExecuteAndCaptureAsync(async() => { await RateLimiter.RateLimit(); if (!webDirectory.Url.EndsWith("/")) { webDirectory.Url += "/"; } Logger.Warn($"Retrieving listings for {webDirectory.Uri}"); HttpResponseMessage httpResponseMessage = await httpClient.PostAsync($"{OpenDirectoryIndexer.Session.Root.Url}{Uri.EscapeDataString(webDirectory.Url.Replace(OpenDirectoryIndexer.Session.Root.Url, string.Empty).TrimEnd('/'))}/?rootId={OpenDirectoryIndexer.Session.Parameters[Constants.Parameters_GdIndex_RootId]}", null); webDirectory.ParsedSuccessfully = httpResponseMessage.IsSuccessStatusCode; httpResponseMessage.EnsureSuccessStatusCode(); string responseJson = await httpResponseMessage.Content.ReadAsStringAsync(); GdIndexResponse indexResponse = GdIndexResponse.FromJson(responseJson); webDirectory.ParsedSuccessfully = indexResponse != null; foreach (File file in indexResponse.Files) { if (file.MimeType == FolderMimeType) { webDirectory.Subdirectories.Add(new WebDirectory(webDirectory) { Parser = Parser, // Yes, string concatenation, do not use new Uri(webDirectory.Uri, file.Name), because things could end with a space... Url = $"{webDirectory.Uri}{file.Name}/", Name = file.Name }); } else { webDirectory.Files.Add(new WebFile { Url = new Uri(webDirectory.Uri, file.Name).ToString(), FileName = file.Name, FileSize = file.Size }); } } }); } catch (Exception ex) { Logger.Error(ex, $"Error retrieving directory listing for {webDirectory.Url}"); webDirectory.Error = true; OpenDirectoryIndexer.Session.Errors++; if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url)) { OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url); } //throw; } return(webDirectory); }