/// <summary> /// Logs in the current Reddit instance. /// </summary> /// <param name="username">The username of the user to log on to.</param> /// <param name="password">The password of the user to log on to.</param> /// <param name="useSsl">Whether to use SSL or not. (default: true)</param> /// <returns></returns> public AuthenticatedUser LogIn(string username, string password, bool useSsl = true) { if (Type.GetType("Mono.Runtime") != null) { ServicePointManager.ServerCertificateValidationCallback = (s, c, ch, ssl) => true; } _webAgent.Cookies = new CookieContainer(); HttpWebRequest request; if (useSsl) { request = _webAgent.CreatePost(SslLoginUrl); } else { request = _webAgent.CreatePost(LoginUrl); } var stream = request.GetRequestStream(); if (useSsl) { _webAgent.WritePostBody(stream, new { user = username, passwd = password, api_type = "json" }); } else { _webAgent.WritePostBody(stream, new { user = username, passwd = password, api_type = "json", op = "login" }); } stream.Close(); try { _webAgent.ExecuteRequest(request); } catch { throw new AuthenticationException("Incorrect login."); } InitOrUpdateUser(); return(User); }
/// <summary> /// Gets the OAuth token for the user associated with the provided code. /// </summary> /// <param name="code">Sent by reddit as a parameter in the return uri.</param> /// <param name="isRefresh">Set to true for refresh requests.</param> /// <returns></returns> public string GetOAuthToken(string code, bool isRefresh = false) { if (Type.GetType("Mono.Runtime") != null) { ServicePointManager.ServerCertificateValidationCallback = (s, c, ch, ssl) => true; } _webAgent.Cookies = new CookieContainer(); var request = _webAgent.CreatePost(AccessUrl); request.InitWebReqProxy(); request.Headers["Authorization"] = "Basic " + Convert.ToBase64String(Encoding.Default.GetBytes(_clientId + ":" + _clientSecret)); var stream = request.GetRequestStream(); if (isRefresh) { _webAgent.WritePostBody(stream, new { grant_type = "refresh_token", refresh_token = code }); } else { _webAgent.WritePostBody(stream, new { grant_type = "authorization_code", code, redirect_uri = _redirectUri }); } stream.Close(); var json = _webAgent.ExecuteRequest(request); if (json["access_token"] != null) { if (json["refresh_token"] != null) { RefreshToken = json["refresh_token"].ToString(); } OAuthToken = json["access_token"].ToString(); return(json["access_token"].ToString()); } throw new AuthenticationException("Could not log in."); }
public static async Task <IEnumerable <string> > GetDisallowedUrls(IWebAgent webAgent, string domain) { var uri = new Uri($"http://{domain}/robots.txt"); var list = new List <string>(); var text = ""; using (var response = await webAgent.ExecuteRequest(uri)) { if ((int)response.StatusCode >= 400 || (int)response.StatusCode <= 599) { return(list); } using (var stream = webAgent.GetCompressedStream(response)) using (var reader = new StreamReader(stream, Encoding.Default)) { text = reader.ReadToEnd(); } } var lines = text.ToLower().Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); var name = webAgent.AgentName.ToLower(); var applicable = false; foreach (var line in lines) { if (line.Contains("user-agent")) { applicable = line.Contains("*") || line.Contains(name); } if (line.Contains("disallow") && applicable) { var split = line.Split(':'); if (split.Length < 2) { continue; } var rule = split[1].Trim(); list.Add(rule); } } return(list); }
public static IEnumerable<TBUserNote> GetUserNotes(IWebAgent webAgent, string subName) { var request = webAgent.CreateGet(String.Format(ToolBoxUserNotesWiki, subName)); var reqResponse = webAgent.ExecuteRequest(request); var response = JObject.Parse(reqResponse["data"]["content_md"].Value<string>()); int version = response["ver"].Value<int>(); string[] mods = response["constants"]["users"].Values<string>().ToArray(); string[] warnings = response["constants"]["warnings"].Values<string>().ToArray(); if (version < 6) throw new ToolBoxUserNotesException("Unsupported ToolBox version"); try { var data = Convert.FromBase64String(response["blob"].Value<string>()); string uncompressed; using (System.IO.MemoryStream compressedStream = new System.IO.MemoryStream(data)) { compressedStream.ReadByte(); compressedStream.ReadByte(); //skips first to bytes to fix zlib block size using (DeflateStream blobStream = new DeflateStream(compressedStream, CompressionMode.Decompress)) { using (var decompressedReader = new System.IO.StreamReader(blobStream)) { uncompressed = decompressedReader.ReadToEnd(); } } } JObject users = JObject.Parse(uncompressed); List<TBUserNote> toReturn = new List<TBUserNote>(); foreach (KeyValuePair<string, JToken> user in users) { var x = user.Value; foreach (JToken note in x["ns"].Children()) { TBUserNote uNote = new TBUserNote(); uNote.AppliesToUsername = user.Key; uNote.SubName = subName; uNote.SubmitterIndex = note["m"].Value<int>(); uNote.Submitter = mods[uNote.SubmitterIndex]; uNote.NoteTypeIndex = note["w"].Value<int>(); uNote.NoteType = warnings[uNote.NoteTypeIndex]; uNote.Message = note["n"].Value<string>(); uNote.Timestamp = UnixTimeStamp.UnixTimeStampToDateTime(note["t"].Value<long>()); uNote.Url = UnsquashLink(subName, note["l"].ValueOrDefault<string>()); toReturn.Add(uNote); } } return toReturn; } catch (Exception e) { throw new ToolBoxUserNotesException("An error occured while processing Usernotes wiki. See inner exception for details", e); } }
public static IEnumerable <TBUserNote> GetUserNotes(IWebAgent webAgent, string subName) { var request = webAgent.CreateGet(string.Format(ToolBoxUserNotesWiki, subName)); var reqResponse = webAgent.ExecuteRequest(request); var response = JObject.Parse(reqResponse["data"]["content_md"].Value <string>()); int version = response["ver"].Value <int>(); string[] mods = response["constants"]["users"].Values <string>().ToArray(); string[] warnings = response["constants"]["warnings"].Values <string>().ToArray(); if (version < 6) { throw new ToolBoxUserNotesException("Unsupported ToolBox version"); } try { var data = Convert.FromBase64String(response["blob"].Value <string>()); string uncompressed; using (System.IO.MemoryStream compressedStream = new System.IO.MemoryStream(data)) { compressedStream.ReadByte(); compressedStream.ReadByte(); //skips first to bytes to fix zlib block size using (DeflateStream blobStream = new DeflateStream(compressedStream, CompressionMode.Decompress)) { using (var decompressedReader = new System.IO.StreamReader(blobStream)) { uncompressed = decompressedReader.ReadToEnd(); } } } JObject users = JObject.Parse(uncompressed); List <TBUserNote> toReturn = new List <TBUserNote>(); foreach (KeyValuePair <string, JToken> user in users) { var x = user.Value; foreach (JToken note in x["ns"].Children()) { TBUserNote uNote = new TBUserNote(); uNote.AppliesToUsername = user.Key; uNote.SubName = subName; uNote.SubmitterIndex = note["m"].Value <int>(); uNote.Submitter = mods[uNote.SubmitterIndex]; uNote.NoteTypeIndex = note["w"].Value <int>(); uNote.NoteType = warnings[uNote.NoteTypeIndex]; uNote.Message = note["n"].Value <string>(); uNote.Timestamp = UnixTimeStamp.UnixTimeStampToDateTime(note["t"].Value <long>()); uNote.Url = UnsquashLink(subName, note["l"].ValueOrDefault <string>()); toReturn.Add(uNote); } } return(toReturn); } catch (Exception e) { throw new ToolBoxUserNotesException("An error occured while processing Usernotes wiki. See inner exception for details", e); } }
private async Task ThreadAction(IWorker worker, CrawlJob job) { // sort out multi threading holding pattern if (worker.Id != 0) { while (_queue.Count < (worker.Id + 1) && !_cancelSource.Token.IsCancellationRequested && !_aThreadIsComplete) { Thread.Sleep(100); } } while (job.CompletionConditions.All(cc => !cc.ConditionMet(GetCrawlProgress())) && !_cancelSource.Token.IsCancellationRequested && !_aThreadIsComplete) { if (worker.Id == 0 && NeedsUpdate()) { _updateAction(GetCrawlProgress()); } // set up fallback and retry policies var fallback = Policy <Uri> .Handle <CrawlQueueEmptyException>() .Fallback((cToken) => { _aThreadIsComplete = true; return(null); }); var retry = Policy <Uri> .Handle <CrawlQueueEmptyException>() .WaitAndRetry(10, tryNum => TimeSpan.FromMilliseconds(tryNum * 200)); // will attempt to get a new item from the queue, retrying as per above policies var next = Policy.Wrap(fallback, retry).Execute(() => { var n = GetNext(); if (n == null) { throw new CrawlQueueEmptyException(); } return(n); }); // fallback will set this if we failed to get a new link (this will end the crawl) if (_aThreadIsComplete) { continue; } try { // access it var responseTask = _webAgent.ExecuteRequest(next); // log that we've crawled it _crawled.Add(next); var response = await responseTask; if (response != null) { var html = HTMLRetriever.GetHTML(_webAgent.GetCompressedStream(response)); // parse the contents for new links and data user wants var data = DataExtractor.Extract(html, job.Domain, job.Regex); // add each of the links extracted if: // the queue is not too large // the link is not disallowed by the domain's robots.txt file // the link is not already in the queue // the link has not already been crawled // each of the user defined enqueue conditions returns true foreach (var link in data.Links) { if (_queue.Count < QUEUE_MAX && RobotParser.UriIsAllowed(_disallowedUrls, link) && !_queue.Contains(link) && !_crawled.Contains(link) && job.EnqueueConditions.All(ec => ec.ConditionMet(link))) { _queue.Enqueue(link); } } // add data matching the regex to the return list foreach (var foundData in data.Data) { _results.Add(foundData); } } } catch (WebException e) { _errors.Add(e); } } if (!_aThreadIsComplete) { _aThreadIsComplete = true; } worker.DoneEvent.Set(); }