Example #1
0
        /// <summary>
        /// Logs in the current Reddit instance.
        /// </summary>
        /// <param name="username">The username of the user to log on to.</param>
        /// <param name="password">The password of the user to log on to.</param>
        /// <param name="useSsl">Whether to use SSL or not. (default: true)</param>
        /// <returns></returns>
        public AuthenticatedUser LogIn(string username, string password, bool useSsl = true)
        {
            if (Type.GetType("Mono.Runtime") != null)
            {
                ServicePointManager.ServerCertificateValidationCallback = (s, c, ch, ssl) => true;
            }
            _webAgent.Cookies = new CookieContainer();
            HttpWebRequest request;

            if (useSsl)
            {
                request = _webAgent.CreatePost(SslLoginUrl);
            }
            else
            {
                request = _webAgent.CreatePost(LoginUrl);
            }
            var stream = request.GetRequestStream();

            if (useSsl)
            {
                _webAgent.WritePostBody(stream, new
                {
                    user     = username,
                    passwd   = password,
                    api_type = "json"
                });
            }
            else
            {
                _webAgent.WritePostBody(stream, new
                {
                    user     = username,
                    passwd   = password,
                    api_type = "json",
                    op       = "login"
                });
            }
            stream.Close();
            try
            {
                _webAgent.ExecuteRequest(request);
            }
            catch
            {
                throw new AuthenticationException("Incorrect login.");
            }
            InitOrUpdateUser();

            return(User);
        }
Example #2
0
        /// <summary>
        /// Gets the OAuth token for the user associated with the provided code.
        /// </summary>
        /// <param name="code">Sent by reddit as a parameter in the return uri.</param>
        /// <param name="isRefresh">Set to true for refresh requests.</param>
        /// <returns></returns>
        public string GetOAuthToken(string code, bool isRefresh = false)
        {
            if (Type.GetType("Mono.Runtime") != null)
            {
                ServicePointManager.ServerCertificateValidationCallback = (s, c, ch, ssl) => true;
            }
            _webAgent.Cookies = new CookieContainer();

            var request = _webAgent.CreatePost(AccessUrl);

            request.InitWebReqProxy();
            request.Headers["Authorization"] = "Basic " + Convert.ToBase64String(Encoding.Default.GetBytes(_clientId + ":" + _clientSecret));
            var stream = request.GetRequestStream();

            if (isRefresh)
            {
                _webAgent.WritePostBody(stream, new
                {
                    grant_type    = "refresh_token",
                    refresh_token = code
                });
            }
            else
            {
                _webAgent.WritePostBody(stream, new
                {
                    grant_type = "authorization_code",
                    code,
                    redirect_uri = _redirectUri
                });
            }

            stream.Close();
            var json = _webAgent.ExecuteRequest(request);

            if (json["access_token"] != null)
            {
                if (json["refresh_token"] != null)
                {
                    RefreshToken = json["refresh_token"].ToString();
                }
                OAuthToken = json["access_token"].ToString();
                return(json["access_token"].ToString());
            }
            throw new AuthenticationException("Could not log in.");
        }
Example #3
0
        public static async Task <IEnumerable <string> > GetDisallowedUrls(IWebAgent webAgent, string domain)
        {
            var uri  = new Uri($"http://{domain}/robots.txt");
            var list = new List <string>();
            var text = "";

            using (var response = await webAgent.ExecuteRequest(uri))
            {
                if ((int)response.StatusCode >= 400 || (int)response.StatusCode <= 599)
                {
                    return(list);
                }

                using (var stream = webAgent.GetCompressedStream(response))
                    using (var reader = new StreamReader(stream, Encoding.Default))
                    {
                        text = reader.ReadToEnd();
                    }
            }

            var lines = text.ToLower().Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

            var name       = webAgent.AgentName.ToLower();
            var applicable = false;

            foreach (var line in lines)
            {
                if (line.Contains("user-agent"))
                {
                    applicable = line.Contains("*") || line.Contains(name);
                }

                if (line.Contains("disallow") && applicable)
                {
                    var split = line.Split(':');
                    if (split.Length < 2)
                    {
                        continue;
                    }

                    var rule = split[1].Trim();

                    list.Add(rule);
                }
            }

            return(list);
        }
        public static IEnumerable<TBUserNote> GetUserNotes(IWebAgent webAgent, string subName)
        {
            var request = webAgent.CreateGet(String.Format(ToolBoxUserNotesWiki, subName));
            var reqResponse = webAgent.ExecuteRequest(request);
            var response = JObject.Parse(reqResponse["data"]["content_md"].Value<string>());

            int version = response["ver"].Value<int>();
            string[] mods = response["constants"]["users"].Values<string>().ToArray();

            string[] warnings = response["constants"]["warnings"].Values<string>().ToArray();

            if (version < 6) throw new ToolBoxUserNotesException("Unsupported ToolBox version");

            try
            {
                var data = Convert.FromBase64String(response["blob"].Value<string>());

                string uncompressed;
                using (System.IO.MemoryStream compressedStream = new System.IO.MemoryStream(data))
                {
                    compressedStream.ReadByte();
                    compressedStream.ReadByte(); //skips first to bytes to fix zlib block size
                    using (DeflateStream blobStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
                    {
                        using (var decompressedReader = new System.IO.StreamReader(blobStream))
                        {
                            uncompressed = decompressedReader.ReadToEnd();
                        }

                    }
                }

                JObject users = JObject.Parse(uncompressed);

                List<TBUserNote> toReturn = new List<TBUserNote>();
                foreach (KeyValuePair<string, JToken> user in users)
                {
                    var x = user.Value;
                    foreach (JToken note in x["ns"].Children())
                    {

                        TBUserNote uNote = new TBUserNote();
                        uNote.AppliesToUsername = user.Key;
                        uNote.SubName = subName;
                        uNote.SubmitterIndex = note["m"].Value<int>();
                        uNote.Submitter = mods[uNote.SubmitterIndex];
                        uNote.NoteTypeIndex = note["w"].Value<int>();
                        uNote.NoteType = warnings[uNote.NoteTypeIndex];
                        uNote.Message = note["n"].Value<string>();
                        uNote.Timestamp = UnixTimeStamp.UnixTimeStampToDateTime(note["t"].Value<long>());
                        uNote.Url = UnsquashLink(subName, note["l"].ValueOrDefault<string>());

                        toReturn.Add(uNote);
                    }
                }
                return toReturn;
            }
            catch (Exception e)
            {
                throw new ToolBoxUserNotesException("An error occured while processing Usernotes wiki. See inner exception for details", e);
            }
        }
        public static IEnumerable <TBUserNote> GetUserNotes(IWebAgent webAgent, string subName)
        {
            var request     = webAgent.CreateGet(string.Format(ToolBoxUserNotesWiki, subName));
            var reqResponse = webAgent.ExecuteRequest(request);
            var response    = JObject.Parse(reqResponse["data"]["content_md"].Value <string>());

            int version = response["ver"].Value <int>();

            string[] mods = response["constants"]["users"].Values <string>().ToArray();

            string[] warnings = response["constants"]["warnings"].Values <string>().ToArray();

            if (version < 6)
            {
                throw new ToolBoxUserNotesException("Unsupported ToolBox version");
            }

            try
            {
                var data = Convert.FromBase64String(response["blob"].Value <string>());

                string uncompressed;
                using (System.IO.MemoryStream compressedStream = new System.IO.MemoryStream(data))
                {
                    compressedStream.ReadByte();
                    compressedStream.ReadByte(); //skips first to bytes to fix zlib block size
                    using (DeflateStream blobStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
                    {
                        using (var decompressedReader = new System.IO.StreamReader(blobStream))
                        {
                            uncompressed = decompressedReader.ReadToEnd();
                        }
                    }
                }

                JObject users = JObject.Parse(uncompressed);

                List <TBUserNote> toReturn = new List <TBUserNote>();
                foreach (KeyValuePair <string, JToken> user in users)
                {
                    var x = user.Value;
                    foreach (JToken note in x["ns"].Children())
                    {
                        TBUserNote uNote = new TBUserNote();
                        uNote.AppliesToUsername = user.Key;
                        uNote.SubName           = subName;
                        uNote.SubmitterIndex    = note["m"].Value <int>();
                        uNote.Submitter         = mods[uNote.SubmitterIndex];
                        uNote.NoteTypeIndex     = note["w"].Value <int>();
                        uNote.NoteType          = warnings[uNote.NoteTypeIndex];
                        uNote.Message           = note["n"].Value <string>();
                        uNote.Timestamp         = UnixTimeStamp.UnixTimeStampToDateTime(note["t"].Value <long>());
                        uNote.Url = UnsquashLink(subName, note["l"].ValueOrDefault <string>());

                        toReturn.Add(uNote);
                    }
                }
                return(toReturn);
            }
            catch (Exception e)
            {
                throw new ToolBoxUserNotesException("An error occured while processing Usernotes wiki. See inner exception for details", e);
            }
        }
Example #6
0
        private async Task ThreadAction(IWorker worker, CrawlJob job)
        {
            // sort out multi threading holding pattern
            if (worker.Id != 0)
            {
                while (_queue.Count < (worker.Id + 1) && !_cancelSource.Token.IsCancellationRequested && !_aThreadIsComplete)
                {
                    Thread.Sleep(100);
                }
            }

            while (job.CompletionConditions.All(cc => !cc.ConditionMet(GetCrawlProgress())) &&
                   !_cancelSource.Token.IsCancellationRequested &&
                   !_aThreadIsComplete)
            {
                if (worker.Id == 0 && NeedsUpdate())
                {
                    _updateAction(GetCrawlProgress());
                }

                // set up fallback and retry policies
                var fallback = Policy <Uri> .Handle <CrawlQueueEmptyException>()
                               .Fallback((cToken) =>
                {
                    _aThreadIsComplete = true;
                    return(null);
                });

                var retry = Policy <Uri> .Handle <CrawlQueueEmptyException>()
                            .WaitAndRetry(10, tryNum => TimeSpan.FromMilliseconds(tryNum * 200));

                // will attempt to get a new item from the queue, retrying as per above policies
                var next = Policy.Wrap(fallback, retry).Execute(() =>
                {
                    var n = GetNext();

                    if (n == null)
                    {
                        throw new CrawlQueueEmptyException();
                    }

                    return(n);
                });

                // fallback will set this if we failed to get a new link (this will end the crawl)
                if (_aThreadIsComplete)
                {
                    continue;
                }

                try
                {
                    // access it
                    var responseTask = _webAgent.ExecuteRequest(next);

                    // log that we've crawled it
                    _crawled.Add(next);

                    var response = await responseTask;

                    if (response != null)
                    {
                        var html = HTMLRetriever.GetHTML(_webAgent.GetCompressedStream(response));

                        // parse the contents for new links and data user wants
                        var data = DataExtractor.Extract(html, job.Domain, job.Regex);

                        // add each of the links extracted if:
                        // the queue is not too large
                        // the link is not disallowed by the domain's robots.txt file
                        // the link is not already in the queue
                        // the link has not already been crawled
                        // each of the user defined enqueue conditions returns true
                        foreach (var link in data.Links)
                        {
                            if (_queue.Count < QUEUE_MAX &&
                                RobotParser.UriIsAllowed(_disallowedUrls, link) &&
                                !_queue.Contains(link) &&
                                !_crawled.Contains(link) &&
                                job.EnqueueConditions.All(ec => ec.ConditionMet(link)))
                            {
                                _queue.Enqueue(link);
                            }
                        }

                        // add data matching the regex to the return list
                        foreach (var foundData in data.Data)
                        {
                            _results.Add(foundData);
                        }
                    }
                }
                catch (WebException e)
                {
                    _errors.Add(e);
                }
            }

            if (!_aThreadIsComplete)
            {
                _aThreadIsComplete = true;
            }

            worker.DoneEvent.Set();
        }